diff options
-rw-r--r-- | fs/jbd2/commit.c | 14 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 58 | ||||
-rw-r--r-- | include/linux/jbd2.h | 15 |
3 files changed, 73 insertions, 14 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6393fd0d804..f22d1828ea8 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -355,6 +355,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
355 | int flags; | 355 | int flags; |
356 | int err; | 356 | int err; |
357 | unsigned long long blocknr; | 357 | unsigned long long blocknr; |
358 | ktime_t start_time; | ||
359 | u64 commit_time; | ||
358 | char *tagp = NULL; | 360 | char *tagp = NULL; |
359 | journal_header_t *header; | 361 | journal_header_t *header; |
360 | journal_block_tag_t *tag = NULL; | 362 | journal_block_tag_t *tag = NULL; |
@@ -481,6 +483,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
481 | commit_transaction->t_state = T_FLUSH; | 483 | commit_transaction->t_state = T_FLUSH; |
482 | journal->j_committing_transaction = commit_transaction; | 484 | journal->j_committing_transaction = commit_transaction; |
483 | journal->j_running_transaction = NULL; | 485 | journal->j_running_transaction = NULL; |
486 | start_time = ktime_get(); | ||
484 | commit_transaction->t_log_start = journal->j_head; | 487 | commit_transaction->t_log_start = journal->j_head; |
485 | wake_up(&journal->j_wait_transaction_locked); | 488 | wake_up(&journal->j_wait_transaction_locked); |
486 | spin_unlock(&journal->j_state_lock); | 489 | spin_unlock(&journal->j_state_lock); |
@@ -995,6 +998,17 @@ restart_loop: | |||
995 | J_ASSERT(commit_transaction == journal->j_committing_transaction); | 998 | J_ASSERT(commit_transaction == journal->j_committing_transaction); |
996 | journal->j_commit_sequence = commit_transaction->t_tid; | 999 | journal->j_commit_sequence = commit_transaction->t_tid; |
997 | journal->j_committing_transaction = NULL; | 1000 | journal->j_committing_transaction = NULL; |
1001 | commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); | ||
1002 | |||
1003 | /* | ||
1004 | * weight the commit time higher than the average time so we don't | ||
1005 | * react too strongly to vast changes in the commit time | ||
1006 | */ | ||
1007 | if (likely(journal->j_average_commit_time)) | ||
1008 | journal->j_average_commit_time = (commit_time + | ||
1009 | journal->j_average_commit_time*3) / 4; | ||
1010 | else | ||
1011 | journal->j_average_commit_time = commit_time; | ||
998 | spin_unlock(&journal->j_state_lock); | 1012 | spin_unlock(&journal->j_state_lock); |
999 | 1013 | ||
1000 | if (journal->j_commit_callback) | 1014 | if (journal->j_commit_callback) |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 39b7805a599..13dcbc990f4 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/timer.h> | 25 | #include <linux/timer.h> |
26 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
28 | #include <linux/hrtimer.h> | ||
28 | 29 | ||
29 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); | 30 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); |
30 | 31 | ||
@@ -48,6 +49,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
48 | { | 49 | { |
49 | transaction->t_journal = journal; | 50 | transaction->t_journal = journal; |
50 | transaction->t_state = T_RUNNING; | 51 | transaction->t_state = T_RUNNING; |
52 | transaction->t_start_time = ktime_get(); | ||
51 | transaction->t_tid = journal->j_transaction_sequence++; | 53 | transaction->t_tid = journal->j_transaction_sequence++; |
52 | transaction->t_expires = jiffies + journal->j_commit_interval; | 54 | transaction->t_expires = jiffies + journal->j_commit_interval; |
53 | spin_lock_init(&transaction->t_handle_lock); | 55 | spin_lock_init(&transaction->t_handle_lock); |
@@ -1193,7 +1195,7 @@ int jbd2_journal_stop(handle_t *handle) | |||
1193 | { | 1195 | { |
1194 | transaction_t *transaction = handle->h_transaction; | 1196 | transaction_t *transaction = handle->h_transaction; |
1195 | journal_t *journal = transaction->t_journal; | 1197 | journal_t *journal = transaction->t_journal; |
1196 | int old_handle_count, err; | 1198 | int err; |
1197 | pid_t pid; | 1199 | pid_t pid; |
1198 | 1200 | ||
1199 | J_ASSERT(journal_current_handle() == handle); | 1201 | J_ASSERT(journal_current_handle() == handle); |
@@ -1216,24 +1218,52 @@ int jbd2_journal_stop(handle_t *handle) | |||
1216 | /* | 1218 | /* |
1217 | * Implement synchronous transaction batching. If the handle | 1219 | * Implement synchronous transaction batching. If the handle |
1218 | * was synchronous, don't force a commit immediately. Let's | 1220 | * was synchronous, don't force a commit immediately. Let's |
1219 | * yield and let another thread piggyback onto this transaction. | 1221 | * yield and let another thread piggyback onto this |
1220 | * Keep doing that while new threads continue to arrive. | 1222 | * transaction. Keep doing that while new threads continue to |
1221 | * It doesn't cost much - we're about to run a commit and sleep | 1223 | * arrive. It doesn't cost much - we're about to run a commit |
1222 | * on IO anyway. Speeds up many-threaded, many-dir operations | 1224 | * and sleep on IO anyway. Speeds up many-threaded, many-dir |
1223 | * by 30x or more... | 1225 | * operations by 30x or more... |
1226 | * | ||
1227 | * We try and optimize the sleep time against what the | ||
1228 | * underlying disk can do, instead of having a static sleep | ||
1229 | * time. This is useful for the case where our storage is so | ||
1230 | * fast that it is more optimal to go ahead and force a flush | ||
1231 | * and wait for the transaction to be committed than it is to | ||
1232 | * wait for an arbitrary amount of time for new writers to | ||
1233 | * join the transaction. We achieve this by measuring how | ||
1234 | * long it takes to commit a transaction, and compare it with | ||
1235 | * how long this transaction has been running, and if run time | ||
1236 | * < commit time then we sleep for the delta and commit. This | ||
1237 | * greatly helps super fast disks that would see slowdowns as | ||
1238 | * more threads started doing fsyncs. | ||
1224 | * | 1239 | * |
1225 | * But don't do this if this process was the most recent one to | 1240 | * But don't do this if this process was the most recent one |
1226 | * perform a synchronous write. We do this to detect the case where a | 1241 | * to perform a synchronous write. We do this to detect the |
1227 | * single process is doing a stream of sync writes. No point in waiting | 1242 | * case where a single process is doing a stream of sync |
1228 | * for joiners in that case. | 1243 | * writes. No point in waiting for joiners in that case. |
1229 | */ | 1244 | */ |
1230 | pid = current->pid; | 1245 | pid = current->pid; |
1231 | if (handle->h_sync && journal->j_last_sync_writer != pid) { | 1246 | if (handle->h_sync && journal->j_last_sync_writer != pid) { |
1247 | u64 commit_time, trans_time; | ||
1248 | |||
1232 | journal->j_last_sync_writer = pid; | 1249 | journal->j_last_sync_writer = pid; |
1233 | do { | 1250 | |
1234 | old_handle_count = transaction->t_handle_count; | 1251 | spin_lock(&journal->j_state_lock); |
1235 | schedule_timeout_uninterruptible(1); | 1252 | commit_time = journal->j_average_commit_time; |
1236 | } while (old_handle_count != transaction->t_handle_count); | 1253 | spin_unlock(&journal->j_state_lock); |
1254 | |||
1255 | trans_time = ktime_to_ns(ktime_sub(ktime_get(), | ||
1256 | transaction->t_start_time)); | ||
1257 | |||
1258 | commit_time = min_t(u64, commit_time, | ||
1259 | 1000*jiffies_to_usecs(1)); | ||
1260 | |||
1261 | if (trans_time < commit_time) { | ||
1262 | ktime_t expires = ktime_add_ns(ktime_get(), | ||
1263 | commit_time); | ||
1264 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
1265 | schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); | ||
1266 | } | ||
1237 | } | 1267 | } |
1238 | 1268 | ||
1239 | current->journal_info = NULL; | 1269 | current->journal_info = NULL; |
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f3664574548..ab8cef130c2 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
@@ -638,6 +638,11 @@ struct transaction_s | |||
638 | unsigned long t_expires; | 638 | unsigned long t_expires; |
639 | 639 | ||
640 | /* | 640 | /* |
641 | * When this transaction started, in nanoseconds [no locking] | ||
642 | */ | ||
643 | ktime_t t_start_time; | ||
644 | |||
645 | /* | ||
641 | * How many handles used this transaction? [t_handle_lock] | 646 | * How many handles used this transaction? [t_handle_lock] |
642 | */ | 647 | */ |
643 | int t_handle_count; | 648 | int t_handle_count; |
@@ -939,8 +944,18 @@ struct journal_s | |||
939 | struct buffer_head **j_wbuf; | 944 | struct buffer_head **j_wbuf; |
940 | int j_wbufsize; | 945 | int j_wbufsize; |
941 | 946 | ||
947 | /* | ||
948 | * this is the pid of hte last person to run a synchronous operation | ||
949 | * through the journal | ||
950 | */ | ||
942 | pid_t j_last_sync_writer; | 951 | pid_t j_last_sync_writer; |
943 | 952 | ||
953 | /* | ||
954 | * the average amount of time in nanoseconds it takes to commit a | ||
955 | * transaction to disk. [j_state_lock] | ||
956 | */ | ||
957 | u64 j_average_commit_time; | ||
958 | |||
944 | /* This function is called when a transaction is closed */ | 959 | /* This function is called when a transaction is closed */ |
945 | void (*j_commit_callback)(journal_t *, | 960 | void (*j_commit_callback)(journal_t *, |
946 | transaction_t *); | 961 | transaction_t *); |