diff options
Diffstat (limited to 'fs/jbd2/transaction.c')
-rw-r--r-- | fs/jbd2/transaction.c | 107 |
1 files changed, 93 insertions, 14 deletions
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 39b7805a599a..46b4e347ed7d 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/timer.h> | 25 | #include <linux/timer.h> |
26 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
28 | #include <linux/hrtimer.h> | ||
28 | 29 | ||
29 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); | 30 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); |
30 | 31 | ||
@@ -48,6 +49,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
48 | { | 49 | { |
49 | transaction->t_journal = journal; | 50 | transaction->t_journal = journal; |
50 | transaction->t_state = T_RUNNING; | 51 | transaction->t_state = T_RUNNING; |
52 | transaction->t_start_time = ktime_get(); | ||
51 | transaction->t_tid = journal->j_transaction_sequence++; | 53 | transaction->t_tid = journal->j_transaction_sequence++; |
52 | transaction->t_expires = jiffies + journal->j_commit_interval; | 54 | transaction->t_expires = jiffies + journal->j_commit_interval; |
53 | spin_lock_init(&transaction->t_handle_lock); | 55 | spin_lock_init(&transaction->t_handle_lock); |
@@ -741,6 +743,12 @@ done: | |||
741 | source = kmap_atomic(page, KM_USER0); | 743 | source = kmap_atomic(page, KM_USER0); |
742 | memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); | 744 | memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); |
743 | kunmap_atomic(source, KM_USER0); | 745 | kunmap_atomic(source, KM_USER0); |
746 | |||
747 | /* | ||
748 | * Now that the frozen data is saved off, we need to store | ||
749 | * any matching triggers. | ||
750 | */ | ||
751 | jh->b_frozen_triggers = jh->b_triggers; | ||
744 | } | 752 | } |
745 | jbd_unlock_bh_state(bh); | 753 | jbd_unlock_bh_state(bh); |
746 | 754 | ||
@@ -944,6 +952,47 @@ out: | |||
944 | } | 952 | } |
945 | 953 | ||
946 | /** | 954 | /** |
955 | * void jbd2_journal_set_triggers() - Add triggers for commit writeout | ||
956 | * @bh: buffer to trigger on | ||
957 | * @type: struct jbd2_buffer_trigger_type containing the trigger(s). | ||
958 | * | ||
959 | * Set any triggers on this journal_head. This is always safe, because | ||
960 | * triggers for a committing buffer will be saved off, and triggers for | ||
961 | * a running transaction will match the buffer in that transaction. | ||
962 | * | ||
963 | * Call with NULL to clear the triggers. | ||
964 | */ | ||
965 | void jbd2_journal_set_triggers(struct buffer_head *bh, | ||
966 | struct jbd2_buffer_trigger_type *type) | ||
967 | { | ||
968 | struct journal_head *jh = bh2jh(bh); | ||
969 | |||
970 | jh->b_triggers = type; | ||
971 | } | ||
972 | |||
973 | void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data, | ||
974 | struct jbd2_buffer_trigger_type *triggers) | ||
975 | { | ||
976 | struct buffer_head *bh = jh2bh(jh); | ||
977 | |||
978 | if (!triggers || !triggers->t_commit) | ||
979 | return; | ||
980 | |||
981 | triggers->t_commit(triggers, bh, mapped_data, bh->b_size); | ||
982 | } | ||
983 | |||
984 | void jbd2_buffer_abort_trigger(struct journal_head *jh, | ||
985 | struct jbd2_buffer_trigger_type *triggers) | ||
986 | { | ||
987 | if (!triggers || !triggers->t_abort) | ||
988 | return; | ||
989 | |||
990 | triggers->t_abort(triggers, jh2bh(jh)); | ||
991 | } | ||
992 | |||
993 | |||
994 | |||
995 | /** | ||
947 | * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata | 996 | * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata |
948 | * @handle: transaction to add buffer to. | 997 | * @handle: transaction to add buffer to. |
949 | * @bh: buffer to mark | 998 | * @bh: buffer to mark |
@@ -1193,7 +1242,7 @@ int jbd2_journal_stop(handle_t *handle) | |||
1193 | { | 1242 | { |
1194 | transaction_t *transaction = handle->h_transaction; | 1243 | transaction_t *transaction = handle->h_transaction; |
1195 | journal_t *journal = transaction->t_journal; | 1244 | journal_t *journal = transaction->t_journal; |
1196 | int old_handle_count, err; | 1245 | int err; |
1197 | pid_t pid; | 1246 | pid_t pid; |
1198 | 1247 | ||
1199 | J_ASSERT(journal_current_handle() == handle); | 1248 | J_ASSERT(journal_current_handle() == handle); |
@@ -1216,24 +1265,54 @@ int jbd2_journal_stop(handle_t *handle) | |||
1216 | /* | 1265 | /* |
1217 | * Implement synchronous transaction batching. If the handle | 1266 | * Implement synchronous transaction batching. If the handle |
1218 | * was synchronous, don't force a commit immediately. Let's | 1267 | * was synchronous, don't force a commit immediately. Let's |
1219 | * yield and let another thread piggyback onto this transaction. | 1268 | * yield and let another thread piggyback onto this |
1220 | * Keep doing that while new threads continue to arrive. | 1269 | * transaction. Keep doing that while new threads continue to |
1221 | * It doesn't cost much - we're about to run a commit and sleep | 1270 | * arrive. It doesn't cost much - we're about to run a commit |
1222 | * on IO anyway. Speeds up many-threaded, many-dir operations | 1271 | * and sleep on IO anyway. Speeds up many-threaded, many-dir |
1223 | * by 30x or more... | 1272 | * operations by 30x or more... |
1224 | * | 1273 | * |
1225 | * But don't do this if this process was the most recent one to | 1274 | * We try and optimize the sleep time against what the |
1226 | * perform a synchronous write. We do this to detect the case where a | 1275 | * underlying disk can do, instead of having a static sleep |
1227 | * single process is doing a stream of sync writes. No point in waiting | 1276 | * time. This is useful for the case where our storage is so |
1228 | * for joiners in that case. | 1277 | * fast that it is more optimal to go ahead and force a flush |
1278 | * and wait for the transaction to be committed than it is to | ||
1279 | * wait for an arbitrary amount of time for new writers to | ||
1280 | * join the transaction. We achieve this by measuring how | ||
1281 | * long it takes to commit a transaction, and compare it with | ||
1282 | * how long this transaction has been running, and if run time | ||
1283 | * < commit time then we sleep for the delta and commit. This | ||
1284 | * greatly helps super fast disks that would see slowdowns as | ||
1285 | * more threads started doing fsyncs. | ||
1286 | * | ||
1287 | * But don't do this if this process was the most recent one | ||
1288 | * to perform a synchronous write. We do this to detect the | ||
1289 | * case where a single process is doing a stream of sync | ||
1290 | * writes. No point in waiting for joiners in that case. | ||
1229 | */ | 1291 | */ |
1230 | pid = current->pid; | 1292 | pid = current->pid; |
1231 | if (handle->h_sync && journal->j_last_sync_writer != pid) { | 1293 | if (handle->h_sync && journal->j_last_sync_writer != pid) { |
1294 | u64 commit_time, trans_time; | ||
1295 | |||
1232 | journal->j_last_sync_writer = pid; | 1296 | journal->j_last_sync_writer = pid; |
1233 | do { | 1297 | |
1234 | old_handle_count = transaction->t_handle_count; | 1298 | spin_lock(&journal->j_state_lock); |
1235 | schedule_timeout_uninterruptible(1); | 1299 | commit_time = journal->j_average_commit_time; |
1236 | } while (old_handle_count != transaction->t_handle_count); | 1300 | spin_unlock(&journal->j_state_lock); |
1301 | |||
1302 | trans_time = ktime_to_ns(ktime_sub(ktime_get(), | ||
1303 | transaction->t_start_time)); | ||
1304 | |||
1305 | commit_time = max_t(u64, commit_time, | ||
1306 | 1000*journal->j_min_batch_time); | ||
1307 | commit_time = min_t(u64, commit_time, | ||
1308 | 1000*journal->j_max_batch_time); | ||
1309 | |||
1310 | if (trans_time < commit_time) { | ||
1311 | ktime_t expires = ktime_add_ns(ktime_get(), | ||
1312 | commit_time); | ||
1313 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
1314 | schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); | ||
1315 | } | ||
1237 | } | 1316 | } |
1238 | 1317 | ||
1239 | current->journal_info = NULL; | 1318 | current->journal_info = NULL; |