diff options
Diffstat (limited to 'fs/jbd2/transaction.c')
-rw-r--r-- | fs/jbd2/transaction.c | 60 |
1 files changed, 46 insertions, 14 deletions
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 4f925a4f3d05..46b4e347ed7d 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/timer.h> | 25 | #include <linux/timer.h> |
26 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
28 | #include <linux/hrtimer.h> | ||
28 | 29 | ||
29 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); | 30 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); |
30 | 31 | ||
@@ -48,6 +49,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
48 | { | 49 | { |
49 | transaction->t_journal = journal; | 50 | transaction->t_journal = journal; |
50 | transaction->t_state = T_RUNNING; | 51 | transaction->t_state = T_RUNNING; |
52 | transaction->t_start_time = ktime_get(); | ||
51 | transaction->t_tid = journal->j_transaction_sequence++; | 53 | transaction->t_tid = journal->j_transaction_sequence++; |
52 | transaction->t_expires = jiffies + journal->j_commit_interval; | 54 | transaction->t_expires = jiffies + journal->j_commit_interval; |
53 | spin_lock_init(&transaction->t_handle_lock); | 55 | spin_lock_init(&transaction->t_handle_lock); |
@@ -1240,7 +1242,7 @@ int jbd2_journal_stop(handle_t *handle) | |||
1240 | { | 1242 | { |
1241 | transaction_t *transaction = handle->h_transaction; | 1243 | transaction_t *transaction = handle->h_transaction; |
1242 | journal_t *journal = transaction->t_journal; | 1244 | journal_t *journal = transaction->t_journal; |
1243 | int old_handle_count, err; | 1245 | int err; |
1244 | pid_t pid; | 1246 | pid_t pid; |
1245 | 1247 | ||
1246 | J_ASSERT(journal_current_handle() == handle); | 1248 | J_ASSERT(journal_current_handle() == handle); |
@@ -1263,24 +1265,54 @@ int jbd2_journal_stop(handle_t *handle) | |||
1263 | /* | 1265 | /* |
1264 | * Implement synchronous transaction batching. If the handle | 1266 | * Implement synchronous transaction batching. If the handle |
1265 | * was synchronous, don't force a commit immediately. Let's | 1267 | * was synchronous, don't force a commit immediately. Let's |
1266 | * yield and let another thread piggyback onto this transaction. | 1268 | * yield and let another thread piggyback onto this |
1267 | * Keep doing that while new threads continue to arrive. | 1269 | * transaction. Keep doing that while new threads continue to |
1268 | * It doesn't cost much - we're about to run a commit and sleep | 1270 | * arrive. It doesn't cost much - we're about to run a commit |
1269 | * on IO anyway. Speeds up many-threaded, many-dir operations | 1271 | * and sleep on IO anyway. Speeds up many-threaded, many-dir |
1270 | * by 30x or more... | 1272 | * operations by 30x or more... |
1273 | * | ||
1274 | * We try and optimize the sleep time against what the | ||
1275 | * underlying disk can do, instead of having a static sleep | ||
1276 | * time. This is useful for the case where our storage is so | ||
1277 | * fast that it is more optimal to go ahead and force a flush | ||
1278 | * and wait for the transaction to be committed than it is to | ||
1279 | * wait for an arbitrary amount of time for new writers to | ||
1280 | * join the transaction. We achieve this by measuring how | ||
1281 | * long it takes to commit a transaction, and compare it with | ||
1282 | * how long this transaction has been running, and if run time | ||
1283 | * < commit time then we sleep for the delta and commit. This | ||
1284 | * greatly helps super fast disks that would see slowdowns as | ||
1285 | * more threads started doing fsyncs. | ||
1271 | * | 1286 | * |
1272 | * But don't do this if this process was the most recent one to | 1287 | * But don't do this if this process was the most recent one |
1273 | * perform a synchronous write. We do this to detect the case where a | 1288 | * to perform a synchronous write. We do this to detect the |
1274 | * single process is doing a stream of sync writes. No point in waiting | 1289 | * case where a single process is doing a stream of sync |
1275 | * for joiners in that case. | 1290 | * writes. No point in waiting for joiners in that case. |
1276 | */ | 1291 | */ |
1277 | pid = current->pid; | 1292 | pid = current->pid; |
1278 | if (handle->h_sync && journal->j_last_sync_writer != pid) { | 1293 | if (handle->h_sync && journal->j_last_sync_writer != pid) { |
1294 | u64 commit_time, trans_time; | ||
1295 | |||
1279 | journal->j_last_sync_writer = pid; | 1296 | journal->j_last_sync_writer = pid; |
1280 | do { | 1297 | |
1281 | old_handle_count = transaction->t_handle_count; | 1298 | spin_lock(&journal->j_state_lock); |
1282 | schedule_timeout_uninterruptible(1); | 1299 | commit_time = journal->j_average_commit_time; |
1283 | } while (old_handle_count != transaction->t_handle_count); | 1300 | spin_unlock(&journal->j_state_lock); |
1301 | |||
1302 | trans_time = ktime_to_ns(ktime_sub(ktime_get(), | ||
1303 | transaction->t_start_time)); | ||
1304 | |||
1305 | commit_time = max_t(u64, commit_time, | ||
1306 | 1000*journal->j_min_batch_time); | ||
1307 | commit_time = min_t(u64, commit_time, | ||
1308 | 1000*journal->j_max_batch_time); | ||
1309 | |||
1310 | if (trans_time < commit_time) { | ||
1311 | ktime_t expires = ktime_add_ns(ktime_get(), | ||
1312 | commit_time); | ||
1313 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
1314 | schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); | ||
1315 | } | ||
1284 | } | 1316 | } |
1285 | 1317 | ||
1286 | current->journal_info = NULL; | 1318 | current->journal_info = NULL; |