aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2/transaction.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-10 20:42:53 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-10 20:42:53 -0500
commit506c10f26c481b7f8ef27c1c79290f68989b2e9e (patch)
tree03de82e812f00957aa6276dac2fe51c3358e88d7 /fs/jbd2/transaction.c
parente1df957670aef74ffd9a4ad93e6d2c90bf6b4845 (diff)
parentc59765042f53a79a7a65585042ff463b69cb248c (diff)
Merge commit 'v2.6.29-rc1' into perfcounters/core
Conflicts: include/linux/kernel_stat.h
Diffstat (limited to 'fs/jbd2/transaction.c')
-rw-r--r--fs/jbd2/transaction.c107
1 files changed, 93 insertions, 14 deletions
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 39b7805a599a..46b4e347ed7d 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -25,6 +25,7 @@
25#include <linux/timer.h> 25#include <linux/timer.h>
26#include <linux/mm.h> 26#include <linux/mm.h>
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/hrtimer.h>
28 29
29static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 30static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
30 31
@@ -48,6 +49,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
48{ 49{
49 transaction->t_journal = journal; 50 transaction->t_journal = journal;
50 transaction->t_state = T_RUNNING; 51 transaction->t_state = T_RUNNING;
52 transaction->t_start_time = ktime_get();
51 transaction->t_tid = journal->j_transaction_sequence++; 53 transaction->t_tid = journal->j_transaction_sequence++;
52 transaction->t_expires = jiffies + journal->j_commit_interval; 54 transaction->t_expires = jiffies + journal->j_commit_interval;
53 spin_lock_init(&transaction->t_handle_lock); 55 spin_lock_init(&transaction->t_handle_lock);
@@ -741,6 +743,12 @@ done:
741 source = kmap_atomic(page, KM_USER0); 743 source = kmap_atomic(page, KM_USER0);
742 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); 744 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
743 kunmap_atomic(source, KM_USER0); 745 kunmap_atomic(source, KM_USER0);
746
747 /*
748 * Now that the frozen data is saved off, we need to store
749 * any matching triggers.
750 */
751 jh->b_frozen_triggers = jh->b_triggers;
744 } 752 }
745 jbd_unlock_bh_state(bh); 753 jbd_unlock_bh_state(bh);
746 754
@@ -944,6 +952,47 @@ out:
944} 952}
945 953
946/** 954/**
955 * void jbd2_journal_set_triggers() - Add triggers for commit writeout
956 * @bh: buffer to trigger on
957 * @type: struct jbd2_buffer_trigger_type containing the trigger(s).
958 *
959 * Set any triggers on this journal_head. This is always safe, because
960 * triggers for a committing buffer will be saved off, and triggers for
961 * a running transaction will match the buffer in that transaction.
962 *
963 * Call with NULL to clear the triggers.
964 */
965void jbd2_journal_set_triggers(struct buffer_head *bh,
966 struct jbd2_buffer_trigger_type *type)
967{
968 struct journal_head *jh = bh2jh(bh);
969
970 jh->b_triggers = type;
971}
972
973void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data,
974 struct jbd2_buffer_trigger_type *triggers)
975{
976 struct buffer_head *bh = jh2bh(jh);
977
978 if (!triggers || !triggers->t_commit)
979 return;
980
981 triggers->t_commit(triggers, bh, mapped_data, bh->b_size);
982}
983
984void jbd2_buffer_abort_trigger(struct journal_head *jh,
985 struct jbd2_buffer_trigger_type *triggers)
986{
987 if (!triggers || !triggers->t_abort)
988 return;
989
990 triggers->t_abort(triggers, jh2bh(jh));
991}
992
993
994
995/**
947 * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata 996 * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
948 * @handle: transaction to add buffer to. 997 * @handle: transaction to add buffer to.
949 * @bh: buffer to mark 998 * @bh: buffer to mark
@@ -1193,7 +1242,7 @@ int jbd2_journal_stop(handle_t *handle)
1193{ 1242{
1194 transaction_t *transaction = handle->h_transaction; 1243 transaction_t *transaction = handle->h_transaction;
1195 journal_t *journal = transaction->t_journal; 1244 journal_t *journal = transaction->t_journal;
1196 int old_handle_count, err; 1245 int err;
1197 pid_t pid; 1246 pid_t pid;
1198 1247
1199 J_ASSERT(journal_current_handle() == handle); 1248 J_ASSERT(journal_current_handle() == handle);
@@ -1216,24 +1265,54 @@ int jbd2_journal_stop(handle_t *handle)
1216 /* 1265 /*
1217 * Implement synchronous transaction batching. If the handle 1266 * Implement synchronous transaction batching. If the handle
1218 * was synchronous, don't force a commit immediately. Let's 1267 * was synchronous, don't force a commit immediately. Let's
1219 * yield and let another thread piggyback onto this transaction. 1268 * yield and let another thread piggyback onto this
1220 * Keep doing that while new threads continue to arrive. 1269 * transaction. Keep doing that while new threads continue to
1221 * It doesn't cost much - we're about to run a commit and sleep 1270 * arrive. It doesn't cost much - we're about to run a commit
1222 * on IO anyway. Speeds up many-threaded, many-dir operations 1271 * and sleep on IO anyway. Speeds up many-threaded, many-dir
1223 * by 30x or more... 1272 * operations by 30x or more...
1224 * 1273 *
1225 * But don't do this if this process was the most recent one to 1274 * We try and optimize the sleep time against what the
1226 * perform a synchronous write. We do this to detect the case where a 1275 * underlying disk can do, instead of having a static sleep
1227 * single process is doing a stream of sync writes. No point in waiting 1276 * time. This is useful for the case where our storage is so
1228 * for joiners in that case. 1277 * fast that it is more optimal to go ahead and force a flush
1278 * and wait for the transaction to be committed than it is to
1279 * wait for an arbitrary amount of time for new writers to
1280 * join the transaction. We achieve this by measuring how
1281 * long it takes to commit a transaction, and compare it with
1282 * how long this transaction has been running, and if run time
1283 * < commit time then we sleep for the delta and commit. This
1284 * greatly helps super fast disks that would see slowdowns as
1285 * more threads started doing fsyncs.
1286 *
1287 * But don't do this if this process was the most recent one
1288 * to perform a synchronous write. We do this to detect the
1289 * case where a single process is doing a stream of sync
1290 * writes. No point in waiting for joiners in that case.
1229 */ 1291 */
1230 pid = current->pid; 1292 pid = current->pid;
1231 if (handle->h_sync && journal->j_last_sync_writer != pid) { 1293 if (handle->h_sync && journal->j_last_sync_writer != pid) {
1294 u64 commit_time, trans_time;
1295
1232 journal->j_last_sync_writer = pid; 1296 journal->j_last_sync_writer = pid;
1233 do { 1297
1234 old_handle_count = transaction->t_handle_count; 1298 spin_lock(&journal->j_state_lock);
1235 schedule_timeout_uninterruptible(1); 1299 commit_time = journal->j_average_commit_time;
1236 } while (old_handle_count != transaction->t_handle_count); 1300 spin_unlock(&journal->j_state_lock);
1301
1302 trans_time = ktime_to_ns(ktime_sub(ktime_get(),
1303 transaction->t_start_time));
1304
1305 commit_time = max_t(u64, commit_time,
1306 1000*journal->j_min_batch_time);
1307 commit_time = min_t(u64, commit_time,
1308 1000*journal->j_max_batch_time);
1309
1310 if (trans_time < commit_time) {
1311 ktime_t expires = ktime_add_ns(ktime_get(),
1312 commit_time);
1313 set_current_state(TASK_UNINTERRUPTIBLE);
1314 schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
1315 }
1237 } 1316 }
1238 1317
1239 current->journal_info = NULL; 1318 current->journal_info = NULL;