aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2/transaction.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd2/transaction.c')
-rw-r--r--fs/jbd2/transaction.c60
1 files changed, 46 insertions, 14 deletions
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 4f925a4f3d05..46b4e347ed7d 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -25,6 +25,7 @@
25#include <linux/timer.h> 25#include <linux/timer.h>
26#include <linux/mm.h> 26#include <linux/mm.h>
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/hrtimer.h>
28 29
29static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 30static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
30 31
@@ -48,6 +49,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
48{ 49{
49 transaction->t_journal = journal; 50 transaction->t_journal = journal;
50 transaction->t_state = T_RUNNING; 51 transaction->t_state = T_RUNNING;
52 transaction->t_start_time = ktime_get();
51 transaction->t_tid = journal->j_transaction_sequence++; 53 transaction->t_tid = journal->j_transaction_sequence++;
52 transaction->t_expires = jiffies + journal->j_commit_interval; 54 transaction->t_expires = jiffies + journal->j_commit_interval;
53 spin_lock_init(&transaction->t_handle_lock); 55 spin_lock_init(&transaction->t_handle_lock);
@@ -1240,7 +1242,7 @@ int jbd2_journal_stop(handle_t *handle)
1240{ 1242{
1241 transaction_t *transaction = handle->h_transaction; 1243 transaction_t *transaction = handle->h_transaction;
1242 journal_t *journal = transaction->t_journal; 1244 journal_t *journal = transaction->t_journal;
1243 int old_handle_count, err; 1245 int err;
1244 pid_t pid; 1246 pid_t pid;
1245 1247
1246 J_ASSERT(journal_current_handle() == handle); 1248 J_ASSERT(journal_current_handle() == handle);
@@ -1263,24 +1265,54 @@ int jbd2_journal_stop(handle_t *handle)
1263 /* 1265 /*
1264 * Implement synchronous transaction batching. If the handle 1266 * Implement synchronous transaction batching. If the handle
1265 * was synchronous, don't force a commit immediately. Let's 1267 * was synchronous, don't force a commit immediately. Let's
1266 * yield and let another thread piggyback onto this transaction. 1268 * yield and let another thread piggyback onto this
1267 * Keep doing that while new threads continue to arrive. 1269 * transaction. Keep doing that while new threads continue to
1268 * It doesn't cost much - we're about to run a commit and sleep 1270 * arrive. It doesn't cost much - we're about to run a commit
1269 * on IO anyway. Speeds up many-threaded, many-dir operations 1271 * and sleep on IO anyway. Speeds up many-threaded, many-dir
1270 * by 30x or more... 1272 * operations by 30x or more...
1273 *
1274 * We try and optimize the sleep time against what the
1275 * underlying disk can do, instead of having a static sleep
1276 * time. This is useful for the case where our storage is so
1277 * fast that it is more optimal to go ahead and force a flush
1278 * and wait for the transaction to be committed than it is to
1279 * wait for an arbitrary amount of time for new writers to
1280 * join the transaction. We achieve this by measuring how
1281 * long it takes to commit a transaction, and compare it with
1282 * how long this transaction has been running, and if run time
1283 * < commit time then we sleep for the delta and commit. This
1284 * greatly helps super fast disks that would see slowdowns as
1285 * more threads started doing fsyncs.
1271 * 1286 *
1272 * But don't do this if this process was the most recent one to 1287 * But don't do this if this process was the most recent one
1273 * perform a synchronous write. We do this to detect the case where a 1288 * to perform a synchronous write. We do this to detect the
1274 * single process is doing a stream of sync writes. No point in waiting 1289 * case where a single process is doing a stream of sync
1275 * for joiners in that case. 1290 * writes. No point in waiting for joiners in that case.
1276 */ 1291 */
1277 pid = current->pid; 1292 pid = current->pid;
1278 if (handle->h_sync && journal->j_last_sync_writer != pid) { 1293 if (handle->h_sync && journal->j_last_sync_writer != pid) {
1294 u64 commit_time, trans_time;
1295
1279 journal->j_last_sync_writer = pid; 1296 journal->j_last_sync_writer = pid;
1280 do { 1297
1281 old_handle_count = transaction->t_handle_count; 1298 spin_lock(&journal->j_state_lock);
1282 schedule_timeout_uninterruptible(1); 1299 commit_time = journal->j_average_commit_time;
1283 } while (old_handle_count != transaction->t_handle_count); 1300 spin_unlock(&journal->j_state_lock);
1301
1302 trans_time = ktime_to_ns(ktime_sub(ktime_get(),
1303 transaction->t_start_time));
1304
1305 commit_time = max_t(u64, commit_time,
1306 1000*journal->j_min_batch_time);
1307 commit_time = min_t(u64, commit_time,
1308 1000*journal->j_max_batch_time);
1309
1310 if (trans_time < commit_time) {
1311 ktime_t expires = ktime_add_ns(ktime_get(),
1312 commit_time);
1313 set_current_state(TASK_UNINTERRUPTIBLE);
1314 schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
1315 }
1284 } 1316 }
1285 1317
1286 current->journal_info = NULL; 1318 current->journal_info = NULL;