aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2/transaction.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-01-08 20:14:59 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-08 20:14:59 -0500
commit2150edc6c5cf00f7adb54538b9ea2a3e9cedca3f (patch)
treef72a0d85e66f500b4cead348a231e3d3b9f357bc /fs/jbd2/transaction.c
parentcd764695b67386a81964f68e9c66efd9f13f4d29 (diff)
parent4b905671d2ea09fd48fed72c581df17e40823f39 (diff)
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (57 commits) jbd2: Fix oops in jbd2_journal_init_inode() on corrupted fs ext4: Remove "extents" mount option block: Add Kconfig help which notes that ext4 needs CONFIG_LBD ext4: Make printk's consistently prefixed with "EXT4-fs: " ext4: Add sanity checks for the superblock before mounting the filesystem ext4: Add mount option to set kjournald's I/O priority jbd2: Submit writes to the journal using WRITE_SYNC jbd2: Add pid and journal device name to the "kjournald2 starting" message ext4: Add markers for better debuggability ext4: Remove code to create the journal inode ext4: provide function to release metadata pages under memory pressure ext3: provide function to release metadata pages under memory pressure add releasepage hooks to block devices which can be used by file systems ext4: Fix s_dirty_blocks_counter if block allocation failed with nodelalloc ext4: Init the complete page while building buddy cache ext4: Don't allow new groups to be added during block allocation ext4: mark the blocks/inode bitmap beyond end of group as used ext4: Use new buffer_head flag to check uninit group bitmaps initialization ext4: Fix the race between read_inode_bitmap() and ext4_new_inode() ext4: code cleanup ...
Diffstat (limited to 'fs/jbd2/transaction.c')
-rw-r--r--fs/jbd2/transaction.c60
1 files changed, 46 insertions, 14 deletions
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 4f925a4f3d05..46b4e347ed7d 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -25,6 +25,7 @@
25#include <linux/timer.h> 25#include <linux/timer.h>
26#include <linux/mm.h> 26#include <linux/mm.h>
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/hrtimer.h>
28 29
29static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 30static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
30 31
@@ -48,6 +49,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
48{ 49{
49 transaction->t_journal = journal; 50 transaction->t_journal = journal;
50 transaction->t_state = T_RUNNING; 51 transaction->t_state = T_RUNNING;
52 transaction->t_start_time = ktime_get();
51 transaction->t_tid = journal->j_transaction_sequence++; 53 transaction->t_tid = journal->j_transaction_sequence++;
52 transaction->t_expires = jiffies + journal->j_commit_interval; 54 transaction->t_expires = jiffies + journal->j_commit_interval;
53 spin_lock_init(&transaction->t_handle_lock); 55 spin_lock_init(&transaction->t_handle_lock);
@@ -1240,7 +1242,7 @@ int jbd2_journal_stop(handle_t *handle)
1240{ 1242{
1241 transaction_t *transaction = handle->h_transaction; 1243 transaction_t *transaction = handle->h_transaction;
1242 journal_t *journal = transaction->t_journal; 1244 journal_t *journal = transaction->t_journal;
1243 int old_handle_count, err; 1245 int err;
1244 pid_t pid; 1246 pid_t pid;
1245 1247
1246 J_ASSERT(journal_current_handle() == handle); 1248 J_ASSERT(journal_current_handle() == handle);
@@ -1263,24 +1265,54 @@ int jbd2_journal_stop(handle_t *handle)
1263 /* 1265 /*
1264 * Implement synchronous transaction batching. If the handle 1266 * Implement synchronous transaction batching. If the handle
1265 * was synchronous, don't force a commit immediately. Let's 1267 * was synchronous, don't force a commit immediately. Let's
1266 * yield and let another thread piggyback onto this transaction. 1268 * yield and let another thread piggyback onto this
1267 * Keep doing that while new threads continue to arrive. 1269 * transaction. Keep doing that while new threads continue to
1268 * It doesn't cost much - we're about to run a commit and sleep 1270 * arrive. It doesn't cost much - we're about to run a commit
1269 * on IO anyway. Speeds up many-threaded, many-dir operations 1271 * and sleep on IO anyway. Speeds up many-threaded, many-dir
1270 * by 30x or more... 1272 * operations by 30x or more...
1273 *
1274 * We try and optimize the sleep time against what the
1275 * underlying disk can do, instead of having a static sleep
1276 * time. This is useful for the case where our storage is so
1277 * fast that it is more optimal to go ahead and force a flush
1278 * and wait for the transaction to be committed than it is to
1279 * wait for an arbitrary amount of time for new writers to
1280 * join the transaction. We achieve this by measuring how
1281 * long it takes to commit a transaction, and compare it with
1282 * how long this transaction has been running, and if run time
1283 * < commit time then we sleep for the delta and commit. This
1284 * greatly helps super fast disks that would see slowdowns as
1285 * more threads started doing fsyncs.
1271 * 1286 *
1272 * But don't do this if this process was the most recent one to 1287 * But don't do this if this process was the most recent one
1273 * perform a synchronous write. We do this to detect the case where a 1288 * to perform a synchronous write. We do this to detect the
1274 * single process is doing a stream of sync writes. No point in waiting 1289 * case where a single process is doing a stream of sync
1275 * for joiners in that case. 1290 * writes. No point in waiting for joiners in that case.
1276 */ 1291 */
1277 pid = current->pid; 1292 pid = current->pid;
1278 if (handle->h_sync && journal->j_last_sync_writer != pid) { 1293 if (handle->h_sync && journal->j_last_sync_writer != pid) {
1294 u64 commit_time, trans_time;
1295
1279 journal->j_last_sync_writer = pid; 1296 journal->j_last_sync_writer = pid;
1280 do { 1297
1281 old_handle_count = transaction->t_handle_count; 1298 spin_lock(&journal->j_state_lock);
1282 schedule_timeout_uninterruptible(1); 1299 commit_time = journal->j_average_commit_time;
1283 } while (old_handle_count != transaction->t_handle_count); 1300 spin_unlock(&journal->j_state_lock);
1301
1302 trans_time = ktime_to_ns(ktime_sub(ktime_get(),
1303 transaction->t_start_time));
1304
1305 commit_time = max_t(u64, commit_time,
1306 1000*journal->j_min_batch_time);
1307 commit_time = min_t(u64, commit_time,
1308 1000*journal->j_max_batch_time);
1309
1310 if (trans_time < commit_time) {
1311 ktime_t expires = ktime_add_ns(ktime_get(),
1312 commit_time);
1313 set_current_state(TASK_UNINTERRUPTIBLE);
1314 schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
1315 }
1284 } 1316 }
1285 1317
1286 current->journal_info = NULL; 1318 current->journal_info = NULL;