diff options
Diffstat (limited to 'fs/jbd2')
-rw-r--r-- | fs/jbd2/checkpoint.c | 24 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 67 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 143 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 107 |
4 files changed, 217 insertions, 124 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 9497718fe920..17159cacbd9e 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -249,16 +249,14 @@ restart: | |||
249 | return ret; | 249 | return ret; |
250 | } | 250 | } |
251 | 251 | ||
252 | #define NR_BATCH 64 | ||
253 | |||
254 | static void | 252 | static void |
255 | __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) | 253 | __flush_batch(journal_t *journal, int *batch_count) |
256 | { | 254 | { |
257 | int i; | 255 | int i; |
258 | 256 | ||
259 | ll_rw_block(SWRITE, *batch_count, bhs); | 257 | ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs); |
260 | for (i = 0; i < *batch_count; i++) { | 258 | for (i = 0; i < *batch_count; i++) { |
261 | struct buffer_head *bh = bhs[i]; | 259 | struct buffer_head *bh = journal->j_chkpt_bhs[i]; |
262 | clear_buffer_jwrite(bh); | 260 | clear_buffer_jwrite(bh); |
263 | BUFFER_TRACE(bh, "brelse"); | 261 | BUFFER_TRACE(bh, "brelse"); |
264 | __brelse(bh); | 262 | __brelse(bh); |
@@ -277,8 +275,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) | |||
277 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | 275 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it |
278 | */ | 276 | */ |
279 | static int __process_buffer(journal_t *journal, struct journal_head *jh, | 277 | static int __process_buffer(journal_t *journal, struct journal_head *jh, |
280 | struct buffer_head **bhs, int *batch_count, | 278 | int *batch_count, transaction_t *transaction) |
281 | transaction_t *transaction) | ||
282 | { | 279 | { |
283 | struct buffer_head *bh = jh2bh(jh); | 280 | struct buffer_head *bh = jh2bh(jh); |
284 | int ret = 0; | 281 | int ret = 0; |
@@ -325,14 +322,14 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
325 | get_bh(bh); | 322 | get_bh(bh); |
326 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); | 323 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); |
327 | set_buffer_jwrite(bh); | 324 | set_buffer_jwrite(bh); |
328 | bhs[*batch_count] = bh; | 325 | journal->j_chkpt_bhs[*batch_count] = bh; |
329 | __buffer_relink_io(jh); | 326 | __buffer_relink_io(jh); |
330 | jbd_unlock_bh_state(bh); | 327 | jbd_unlock_bh_state(bh); |
331 | transaction->t_chp_stats.cs_written++; | 328 | transaction->t_chp_stats.cs_written++; |
332 | (*batch_count)++; | 329 | (*batch_count)++; |
333 | if (*batch_count == NR_BATCH) { | 330 | if (*batch_count == JBD2_NR_BATCH) { |
334 | spin_unlock(&journal->j_list_lock); | 331 | spin_unlock(&journal->j_list_lock); |
335 | __flush_batch(journal, bhs, batch_count); | 332 | __flush_batch(journal, batch_count); |
336 | ret = 1; | 333 | ret = 1; |
337 | } | 334 | } |
338 | } | 335 | } |
@@ -388,7 +385,6 @@ restart: | |||
388 | if (journal->j_checkpoint_transactions == transaction && | 385 | if (journal->j_checkpoint_transactions == transaction && |
389 | transaction->t_tid == this_tid) { | 386 | transaction->t_tid == this_tid) { |
390 | int batch_count = 0; | 387 | int batch_count = 0; |
391 | struct buffer_head *bhs[NR_BATCH]; | ||
392 | struct journal_head *jh; | 388 | struct journal_head *jh; |
393 | int retry = 0, err; | 389 | int retry = 0, err; |
394 | 390 | ||
@@ -402,7 +398,7 @@ restart: | |||
402 | retry = 1; | 398 | retry = 1; |
403 | break; | 399 | break; |
404 | } | 400 | } |
405 | retry = __process_buffer(journal, jh, bhs, &batch_count, | 401 | retry = __process_buffer(journal, jh, &batch_count, |
406 | transaction); | 402 | transaction); |
407 | if (retry < 0 && !result) | 403 | if (retry < 0 && !result) |
408 | result = retry; | 404 | result = retry; |
@@ -419,7 +415,7 @@ restart: | |||
419 | spin_unlock(&journal->j_list_lock); | 415 | spin_unlock(&journal->j_list_lock); |
420 | retry = 1; | 416 | retry = 1; |
421 | } | 417 | } |
422 | __flush_batch(journal, bhs, &batch_count); | 418 | __flush_batch(journal, &batch_count); |
423 | } | 419 | } |
424 | 420 | ||
425 | if (retry) { | 421 | if (retry) { |
@@ -686,6 +682,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | |||
686 | safely remove this transaction from the log */ | 682 | safely remove this transaction from the log */ |
687 | 683 | ||
688 | __jbd2_journal_drop_transaction(journal, transaction); | 684 | __jbd2_journal_drop_transaction(journal, transaction); |
685 | kfree(transaction); | ||
689 | 686 | ||
690 | /* Just in case anybody was waiting for more transactions to be | 687 | /* Just in case anybody was waiting for more transactions to be |
691 | checkpointed... */ | 688 | checkpointed... */ |
@@ -760,5 +757,4 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact | |||
760 | J_ASSERT(journal->j_running_transaction != transaction); | 757 | J_ASSERT(journal->j_running_transaction != transaction); |
761 | 758 | ||
762 | jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); | 759 | jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); |
763 | kfree(transaction); | ||
764 | } | 760 | } |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index ebc667bc54a8..62804e57a44c 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/crc32.h> | 25 | #include <linux/crc32.h> |
26 | #include <linux/writeback.h> | 26 | #include <linux/writeback.h> |
27 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
28 | #include <linux/bio.h> | ||
28 | 29 | ||
29 | /* | 30 | /* |
30 | * Default IO end handler for temporary BJ_IO buffer_heads. | 31 | * Default IO end handler for temporary BJ_IO buffer_heads. |
@@ -137,7 +138,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
137 | set_buffer_ordered(bh); | 138 | set_buffer_ordered(bh); |
138 | barrier_done = 1; | 139 | barrier_done = 1; |
139 | } | 140 | } |
140 | ret = submit_bh(WRITE, bh); | 141 | ret = submit_bh(WRITE_SYNC, bh); |
141 | if (barrier_done) | 142 | if (barrier_done) |
142 | clear_buffer_ordered(bh); | 143 | clear_buffer_ordered(bh); |
143 | 144 | ||
@@ -158,7 +159,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
158 | lock_buffer(bh); | 159 | lock_buffer(bh); |
159 | set_buffer_uptodate(bh); | 160 | set_buffer_uptodate(bh); |
160 | clear_buffer_dirty(bh); | 161 | clear_buffer_dirty(bh); |
161 | ret = submit_bh(WRITE, bh); | 162 | ret = submit_bh(WRITE_SYNC, bh); |
162 | } | 163 | } |
163 | *cbh = bh; | 164 | *cbh = bh; |
164 | return ret; | 165 | return ret; |
@@ -168,12 +169,34 @@ static int journal_submit_commit_record(journal_t *journal, | |||
168 | * This function along with journal_submit_commit_record | 169 | * This function along with journal_submit_commit_record |
169 | * allows to write the commit record asynchronously. | 170 | * allows to write the commit record asynchronously. |
170 | */ | 171 | */ |
171 | static int journal_wait_on_commit_record(struct buffer_head *bh) | 172 | static int journal_wait_on_commit_record(journal_t *journal, |
173 | struct buffer_head *bh) | ||
172 | { | 174 | { |
173 | int ret = 0; | 175 | int ret = 0; |
174 | 176 | ||
177 | retry: | ||
175 | clear_buffer_dirty(bh); | 178 | clear_buffer_dirty(bh); |
176 | wait_on_buffer(bh); | 179 | wait_on_buffer(bh); |
180 | if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) { | ||
181 | printk(KERN_WARNING | ||
182 | "JBD2: wait_on_commit_record: sync failed on %s - " | ||
183 | "disabling barriers\n", journal->j_devname); | ||
184 | spin_lock(&journal->j_state_lock); | ||
185 | journal->j_flags &= ~JBD2_BARRIER; | ||
186 | spin_unlock(&journal->j_state_lock); | ||
187 | |||
188 | lock_buffer(bh); | ||
189 | clear_buffer_dirty(bh); | ||
190 | set_buffer_uptodate(bh); | ||
191 | bh->b_end_io = journal_end_buffer_io_sync; | ||
192 | |||
193 | ret = submit_bh(WRITE_SYNC, bh); | ||
194 | if (ret) { | ||
195 | unlock_buffer(bh); | ||
196 | return ret; | ||
197 | } | ||
198 | goto retry; | ||
199 | } | ||
177 | 200 | ||
178 | if (unlikely(!buffer_uptodate(bh))) | 201 | if (unlikely(!buffer_uptodate(bh))) |
179 | ret = -EIO; | 202 | ret = -EIO; |
@@ -332,13 +355,15 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
332 | int flags; | 355 | int flags; |
333 | int err; | 356 | int err; |
334 | unsigned long long blocknr; | 357 | unsigned long long blocknr; |
358 | ktime_t start_time; | ||
359 | u64 commit_time; | ||
335 | char *tagp = NULL; | 360 | char *tagp = NULL; |
336 | journal_header_t *header; | 361 | journal_header_t *header; |
337 | journal_block_tag_t *tag = NULL; | 362 | journal_block_tag_t *tag = NULL; |
338 | int space_left = 0; | 363 | int space_left = 0; |
339 | int first_tag = 0; | 364 | int first_tag = 0; |
340 | int tag_flag; | 365 | int tag_flag; |
341 | int i; | 366 | int i, to_free = 0; |
342 | int tag_bytes = journal_tag_bytes(journal); | 367 | int tag_bytes = journal_tag_bytes(journal); |
343 | struct buffer_head *cbh = NULL; /* For transactional checksums */ | 368 | struct buffer_head *cbh = NULL; /* For transactional checksums */ |
344 | __u32 crc32_sum = ~0; | 369 | __u32 crc32_sum = ~0; |
@@ -458,6 +483,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
458 | commit_transaction->t_state = T_FLUSH; | 483 | commit_transaction->t_state = T_FLUSH; |
459 | journal->j_committing_transaction = commit_transaction; | 484 | journal->j_committing_transaction = commit_transaction; |
460 | journal->j_running_transaction = NULL; | 485 | journal->j_running_transaction = NULL; |
486 | start_time = ktime_get(); | ||
461 | commit_transaction->t_log_start = journal->j_head; | 487 | commit_transaction->t_log_start = journal->j_head; |
462 | wake_up(&journal->j_wait_transaction_locked); | 488 | wake_up(&journal->j_wait_transaction_locked); |
463 | spin_unlock(&journal->j_state_lock); | 489 | spin_unlock(&journal->j_state_lock); |
@@ -509,6 +535,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
509 | if (is_journal_aborted(journal)) { | 535 | if (is_journal_aborted(journal)) { |
510 | clear_buffer_jbddirty(jh2bh(jh)); | 536 | clear_buffer_jbddirty(jh2bh(jh)); |
511 | JBUFFER_TRACE(jh, "journal is aborting: refile"); | 537 | JBUFFER_TRACE(jh, "journal is aborting: refile"); |
538 | jbd2_buffer_abort_trigger(jh, | ||
539 | jh->b_frozen_data ? | ||
540 | jh->b_frozen_triggers : | ||
541 | jh->b_triggers); | ||
512 | jbd2_journal_refile_buffer(journal, jh); | 542 | jbd2_journal_refile_buffer(journal, jh); |
513 | /* If that was the last one, we need to clean up | 543 | /* If that was the last one, we need to clean up |
514 | * any descriptor buffers which may have been | 544 | * any descriptor buffers which may have been |
@@ -799,7 +829,7 @@ wait_for_iobuf: | |||
799 | __jbd2_journal_abort_hard(journal); | 829 | __jbd2_journal_abort_hard(journal); |
800 | } | 830 | } |
801 | if (!err && !is_journal_aborted(journal)) | 831 | if (!err && !is_journal_aborted(journal)) |
802 | err = journal_wait_on_commit_record(cbh); | 832 | err = journal_wait_on_commit_record(journal, cbh); |
803 | 833 | ||
804 | if (err) | 834 | if (err) |
805 | jbd2_journal_abort(journal, err); | 835 | jbd2_journal_abort(journal, err); |
@@ -844,6 +874,9 @@ restart_loop: | |||
844 | * data. | 874 | * data. |
845 | * | 875 | * |
846 | * Otherwise, we can just throw away the frozen data now. | 876 | * Otherwise, we can just throw away the frozen data now. |
877 | * | ||
878 | * We also know that the frozen data has already fired | ||
879 | * its triggers if they exist, so we can clear that too. | ||
847 | */ | 880 | */ |
848 | if (jh->b_committed_data) { | 881 | if (jh->b_committed_data) { |
849 | jbd2_free(jh->b_committed_data, bh->b_size); | 882 | jbd2_free(jh->b_committed_data, bh->b_size); |
@@ -851,10 +884,12 @@ restart_loop: | |||
851 | if (jh->b_frozen_data) { | 884 | if (jh->b_frozen_data) { |
852 | jh->b_committed_data = jh->b_frozen_data; | 885 | jh->b_committed_data = jh->b_frozen_data; |
853 | jh->b_frozen_data = NULL; | 886 | jh->b_frozen_data = NULL; |
887 | jh->b_frozen_triggers = NULL; | ||
854 | } | 888 | } |
855 | } else if (jh->b_frozen_data) { | 889 | } else if (jh->b_frozen_data) { |
856 | jbd2_free(jh->b_frozen_data, bh->b_size); | 890 | jbd2_free(jh->b_frozen_data, bh->b_size); |
857 | jh->b_frozen_data = NULL; | 891 | jh->b_frozen_data = NULL; |
892 | jh->b_frozen_triggers = NULL; | ||
858 | } | 893 | } |
859 | 894 | ||
860 | spin_lock(&journal->j_list_lock); | 895 | spin_lock(&journal->j_list_lock); |
@@ -972,14 +1007,23 @@ restart_loop: | |||
972 | J_ASSERT(commit_transaction == journal->j_committing_transaction); | 1007 | J_ASSERT(commit_transaction == journal->j_committing_transaction); |
973 | journal->j_commit_sequence = commit_transaction->t_tid; | 1008 | journal->j_commit_sequence = commit_transaction->t_tid; |
974 | journal->j_committing_transaction = NULL; | 1009 | journal->j_committing_transaction = NULL; |
975 | spin_unlock(&journal->j_state_lock); | 1010 | commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); |
976 | 1011 | ||
977 | if (journal->j_commit_callback) | 1012 | /* |
978 | journal->j_commit_callback(journal, commit_transaction); | 1013 | * weight the commit time higher than the average time so we don't |
1014 | * react too strongly to vast changes in the commit time | ||
1015 | */ | ||
1016 | if (likely(journal->j_average_commit_time)) | ||
1017 | journal->j_average_commit_time = (commit_time + | ||
1018 | journal->j_average_commit_time*3) / 4; | ||
1019 | else | ||
1020 | journal->j_average_commit_time = commit_time; | ||
1021 | spin_unlock(&journal->j_state_lock); | ||
979 | 1022 | ||
980 | if (commit_transaction->t_checkpoint_list == NULL && | 1023 | if (commit_transaction->t_checkpoint_list == NULL && |
981 | commit_transaction->t_checkpoint_io_list == NULL) { | 1024 | commit_transaction->t_checkpoint_io_list == NULL) { |
982 | __jbd2_journal_drop_transaction(journal, commit_transaction); | 1025 | __jbd2_journal_drop_transaction(journal, commit_transaction); |
1026 | to_free = 1; | ||
983 | } else { | 1027 | } else { |
984 | if (journal->j_checkpoint_transactions == NULL) { | 1028 | if (journal->j_checkpoint_transactions == NULL) { |
985 | journal->j_checkpoint_transactions = commit_transaction; | 1029 | journal->j_checkpoint_transactions = commit_transaction; |
@@ -998,11 +1042,16 @@ restart_loop: | |||
998 | } | 1042 | } |
999 | spin_unlock(&journal->j_list_lock); | 1043 | spin_unlock(&journal->j_list_lock); |
1000 | 1044 | ||
1045 | if (journal->j_commit_callback) | ||
1046 | journal->j_commit_callback(journal, commit_transaction); | ||
1047 | |||
1001 | trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", | 1048 | trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", |
1002 | journal->j_devname, journal->j_commit_sequence, | 1049 | journal->j_devname, commit_transaction->t_tid, |
1003 | journal->j_tail_sequence); | 1050 | journal->j_tail_sequence); |
1004 | jbd_debug(1, "JBD: commit %d complete, head %d\n", | 1051 | jbd_debug(1, "JBD: commit %d complete, head %d\n", |
1005 | journal->j_commit_sequence, journal->j_tail_sequence); | 1052 | journal->j_commit_sequence, journal->j_tail_sequence); |
1053 | if (to_free) | ||
1054 | kfree(commit_transaction); | ||
1006 | 1055 | ||
1007 | wake_up(&journal->j_wait_done_commit); | 1056 | wake_up(&journal->j_wait_done_commit); |
1008 | } | 1057 | } |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e70d657a19f8..56675306ed81 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -40,6 +40,7 @@ | |||
40 | 40 | ||
41 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
42 | #include <asm/page.h> | 42 | #include <asm/page.h> |
43 | #include <asm/div64.h> | ||
43 | 44 | ||
44 | EXPORT_SYMBOL(jbd2_journal_start); | 45 | EXPORT_SYMBOL(jbd2_journal_start); |
45 | EXPORT_SYMBOL(jbd2_journal_restart); | 46 | EXPORT_SYMBOL(jbd2_journal_restart); |
@@ -50,6 +51,7 @@ EXPORT_SYMBOL(jbd2_journal_unlock_updates); | |||
50 | EXPORT_SYMBOL(jbd2_journal_get_write_access); | 51 | EXPORT_SYMBOL(jbd2_journal_get_write_access); |
51 | EXPORT_SYMBOL(jbd2_journal_get_create_access); | 52 | EXPORT_SYMBOL(jbd2_journal_get_create_access); |
52 | EXPORT_SYMBOL(jbd2_journal_get_undo_access); | 53 | EXPORT_SYMBOL(jbd2_journal_get_undo_access); |
54 | EXPORT_SYMBOL(jbd2_journal_set_triggers); | ||
53 | EXPORT_SYMBOL(jbd2_journal_dirty_metadata); | 55 | EXPORT_SYMBOL(jbd2_journal_dirty_metadata); |
54 | EXPORT_SYMBOL(jbd2_journal_release_buffer); | 56 | EXPORT_SYMBOL(jbd2_journal_release_buffer); |
55 | EXPORT_SYMBOL(jbd2_journal_forget); | 57 | EXPORT_SYMBOL(jbd2_journal_forget); |
@@ -65,7 +67,6 @@ EXPORT_SYMBOL(jbd2_journal_update_format); | |||
65 | EXPORT_SYMBOL(jbd2_journal_check_used_features); | 67 | EXPORT_SYMBOL(jbd2_journal_check_used_features); |
66 | EXPORT_SYMBOL(jbd2_journal_check_available_features); | 68 | EXPORT_SYMBOL(jbd2_journal_check_available_features); |
67 | EXPORT_SYMBOL(jbd2_journal_set_features); | 69 | EXPORT_SYMBOL(jbd2_journal_set_features); |
68 | EXPORT_SYMBOL(jbd2_journal_create); | ||
69 | EXPORT_SYMBOL(jbd2_journal_load); | 70 | EXPORT_SYMBOL(jbd2_journal_load); |
70 | EXPORT_SYMBOL(jbd2_journal_destroy); | 71 | EXPORT_SYMBOL(jbd2_journal_destroy); |
71 | EXPORT_SYMBOL(jbd2_journal_abort); | 72 | EXPORT_SYMBOL(jbd2_journal_abort); |
@@ -131,8 +132,9 @@ static int kjournald2(void *arg) | |||
131 | journal->j_task = current; | 132 | journal->j_task = current; |
132 | wake_up(&journal->j_wait_done_commit); | 133 | wake_up(&journal->j_wait_done_commit); |
133 | 134 | ||
134 | printk(KERN_INFO "kjournald2 starting. Commit interval %ld seconds\n", | 135 | printk(KERN_INFO "kjournald2 starting: pid %d, dev %s, " |
135 | journal->j_commit_interval / HZ); | 136 | "commit interval %ld seconds\n", current->pid, |
137 | journal->j_devname, journal->j_commit_interval / HZ); | ||
136 | 138 | ||
137 | /* | 139 | /* |
138 | * And now, wait forever for commit wakeup events. | 140 | * And now, wait forever for commit wakeup events. |
@@ -290,6 +292,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, | |||
290 | struct page *new_page; | 292 | struct page *new_page; |
291 | unsigned int new_offset; | 293 | unsigned int new_offset; |
292 | struct buffer_head *bh_in = jh2bh(jh_in); | 294 | struct buffer_head *bh_in = jh2bh(jh_in); |
295 | struct jbd2_buffer_trigger_type *triggers; | ||
293 | 296 | ||
294 | /* | 297 | /* |
295 | * The buffer really shouldn't be locked: only the current committing | 298 | * The buffer really shouldn't be locked: only the current committing |
@@ -314,13 +317,23 @@ repeat: | |||
314 | done_copy_out = 1; | 317 | done_copy_out = 1; |
315 | new_page = virt_to_page(jh_in->b_frozen_data); | 318 | new_page = virt_to_page(jh_in->b_frozen_data); |
316 | new_offset = offset_in_page(jh_in->b_frozen_data); | 319 | new_offset = offset_in_page(jh_in->b_frozen_data); |
320 | triggers = jh_in->b_frozen_triggers; | ||
317 | } else { | 321 | } else { |
318 | new_page = jh2bh(jh_in)->b_page; | 322 | new_page = jh2bh(jh_in)->b_page; |
319 | new_offset = offset_in_page(jh2bh(jh_in)->b_data); | 323 | new_offset = offset_in_page(jh2bh(jh_in)->b_data); |
324 | triggers = jh_in->b_triggers; | ||
320 | } | 325 | } |
321 | 326 | ||
322 | mapped_data = kmap_atomic(new_page, KM_USER0); | 327 | mapped_data = kmap_atomic(new_page, KM_USER0); |
323 | /* | 328 | /* |
329 | * Fire any commit trigger. Do this before checking for escaping, | ||
330 | * as the trigger may modify the magic offset. If a copy-out | ||
331 | * happens afterwards, it will have the correct data in the buffer. | ||
332 | */ | ||
333 | jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset, | ||
334 | triggers); | ||
335 | |||
336 | /* | ||
324 | * Check for escaping | 337 | * Check for escaping |
325 | */ | 338 | */ |
326 | if (*((__be32 *)(mapped_data + new_offset)) == | 339 | if (*((__be32 *)(mapped_data + new_offset)) == |
@@ -352,6 +365,13 @@ repeat: | |||
352 | new_page = virt_to_page(tmp); | 365 | new_page = virt_to_page(tmp); |
353 | new_offset = offset_in_page(tmp); | 366 | new_offset = offset_in_page(tmp); |
354 | done_copy_out = 1; | 367 | done_copy_out = 1; |
368 | |||
369 | /* | ||
370 | * This isn't strictly necessary, as we're using frozen | ||
371 | * data for the escaping, but it keeps consistency with | ||
372 | * b_frozen_data usage. | ||
373 | */ | ||
374 | jh_in->b_frozen_triggers = jh_in->b_triggers; | ||
355 | } | 375 | } |
356 | 376 | ||
357 | /* | 377 | /* |
@@ -631,6 +651,8 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) | |||
631 | return NULL; | 651 | return NULL; |
632 | 652 | ||
633 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); | 653 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); |
654 | if (!bh) | ||
655 | return NULL; | ||
634 | lock_buffer(bh); | 656 | lock_buffer(bh); |
635 | memset(bh->b_data, 0, journal->j_blocksize); | 657 | memset(bh->b_data, 0, journal->j_blocksize); |
636 | set_buffer_uptodate(bh); | 658 | set_buffer_uptodate(bh); |
@@ -824,6 +846,8 @@ static int jbd2_seq_info_show(struct seq_file *seq, void *v) | |||
824 | jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid)); | 846 | jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid)); |
825 | seq_printf(seq, " %ums logging transaction\n", | 847 | seq_printf(seq, " %ums logging transaction\n", |
826 | jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid)); | 848 | jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid)); |
849 | seq_printf(seq, " %luus average transaction commit time\n", | ||
850 | do_div(s->journal->j_average_commit_time, 1000)); | ||
827 | seq_printf(seq, " %lu handles per transaction\n", | 851 | seq_printf(seq, " %lu handles per transaction\n", |
828 | s->stats->u.run.rs_handle_count / s->stats->ts_tid); | 852 | s->stats->u.run.rs_handle_count / s->stats->ts_tid); |
829 | seq_printf(seq, " %lu blocks per transaction\n", | 853 | seq_printf(seq, " %lu blocks per transaction\n", |
@@ -961,6 +985,8 @@ static journal_t * journal_init_common (void) | |||
961 | spin_lock_init(&journal->j_state_lock); | 985 | spin_lock_init(&journal->j_state_lock); |
962 | 986 | ||
963 | journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); | 987 | journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); |
988 | journal->j_min_batch_time = 0; | ||
989 | journal->j_max_batch_time = 15000; /* 15ms */ | ||
964 | 990 | ||
965 | /* The journal is marked for error until we succeed with recovery! */ | 991 | /* The journal is marked for error until we succeed with recovery! */ |
966 | journal->j_flags = JBD2_ABORT; | 992 | journal->j_flags = JBD2_ABORT; |
@@ -1016,15 +1042,14 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, | |||
1016 | 1042 | ||
1017 | /* journal descriptor can store up to n blocks -bzzz */ | 1043 | /* journal descriptor can store up to n blocks -bzzz */ |
1018 | journal->j_blocksize = blocksize; | 1044 | journal->j_blocksize = blocksize; |
1045 | jbd2_stats_proc_init(journal); | ||
1019 | n = journal->j_blocksize / sizeof(journal_block_tag_t); | 1046 | n = journal->j_blocksize / sizeof(journal_block_tag_t); |
1020 | journal->j_wbufsize = n; | 1047 | journal->j_wbufsize = n; |
1021 | journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); | 1048 | journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); |
1022 | if (!journal->j_wbuf) { | 1049 | if (!journal->j_wbuf) { |
1023 | printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", | 1050 | printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", |
1024 | __func__); | 1051 | __func__); |
1025 | kfree(journal); | 1052 | goto out_err; |
1026 | journal = NULL; | ||
1027 | goto out; | ||
1028 | } | 1053 | } |
1029 | journal->j_dev = bdev; | 1054 | journal->j_dev = bdev; |
1030 | journal->j_fs_dev = fs_dev; | 1055 | journal->j_fs_dev = fs_dev; |
@@ -1034,14 +1059,22 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, | |||
1034 | p = journal->j_devname; | 1059 | p = journal->j_devname; |
1035 | while ((p = strchr(p, '/'))) | 1060 | while ((p = strchr(p, '/'))) |
1036 | *p = '!'; | 1061 | *p = '!'; |
1037 | jbd2_stats_proc_init(journal); | ||
1038 | 1062 | ||
1039 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); | 1063 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); |
1040 | J_ASSERT(bh != NULL); | 1064 | if (!bh) { |
1065 | printk(KERN_ERR | ||
1066 | "%s: Cannot get buffer for journal superblock\n", | ||
1067 | __func__); | ||
1068 | goto out_err; | ||
1069 | } | ||
1041 | journal->j_sb_buffer = bh; | 1070 | journal->j_sb_buffer = bh; |
1042 | journal->j_superblock = (journal_superblock_t *)bh->b_data; | 1071 | journal->j_superblock = (journal_superblock_t *)bh->b_data; |
1043 | out: | 1072 | |
1044 | return journal; | 1073 | return journal; |
1074 | out_err: | ||
1075 | jbd2_stats_proc_exit(journal); | ||
1076 | kfree(journal); | ||
1077 | return NULL; | ||
1045 | } | 1078 | } |
1046 | 1079 | ||
1047 | /** | 1080 | /** |
@@ -1089,9 +1122,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) | |||
1089 | if (!journal->j_wbuf) { | 1122 | if (!journal->j_wbuf) { |
1090 | printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", | 1123 | printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", |
1091 | __func__); | 1124 | __func__); |
1092 | jbd2_stats_proc_exit(journal); | 1125 | goto out_err; |
1093 | kfree(journal); | ||
1094 | return NULL; | ||
1095 | } | 1126 | } |
1096 | 1127 | ||
1097 | err = jbd2_journal_bmap(journal, 0, &blocknr); | 1128 | err = jbd2_journal_bmap(journal, 0, &blocknr); |
@@ -1099,17 +1130,24 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) | |||
1099 | if (err) { | 1130 | if (err) { |
1100 | printk(KERN_ERR "%s: Cannnot locate journal superblock\n", | 1131 | printk(KERN_ERR "%s: Cannnot locate journal superblock\n", |
1101 | __func__); | 1132 | __func__); |
1102 | jbd2_stats_proc_exit(journal); | 1133 | goto out_err; |
1103 | kfree(journal); | ||
1104 | return NULL; | ||
1105 | } | 1134 | } |
1106 | 1135 | ||
1107 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); | 1136 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); |
1108 | J_ASSERT(bh != NULL); | 1137 | if (!bh) { |
1138 | printk(KERN_ERR | ||
1139 | "%s: Cannot get buffer for journal superblock\n", | ||
1140 | __func__); | ||
1141 | goto out_err; | ||
1142 | } | ||
1109 | journal->j_sb_buffer = bh; | 1143 | journal->j_sb_buffer = bh; |
1110 | journal->j_superblock = (journal_superblock_t *)bh->b_data; | 1144 | journal->j_superblock = (journal_superblock_t *)bh->b_data; |
1111 | 1145 | ||
1112 | return journal; | 1146 | return journal; |
1147 | out_err: | ||
1148 | jbd2_stats_proc_exit(journal); | ||
1149 | kfree(journal); | ||
1150 | return NULL; | ||
1113 | } | 1151 | } |
1114 | 1152 | ||
1115 | /* | 1153 | /* |
@@ -1158,77 +1196,6 @@ static int journal_reset(journal_t *journal) | |||
1158 | } | 1196 | } |
1159 | 1197 | ||
1160 | /** | 1198 | /** |
1161 | * int jbd2_journal_create() - Initialise the new journal file | ||
1162 | * @journal: Journal to create. This structure must have been initialised | ||
1163 | * | ||
1164 | * Given a journal_t structure which tells us which disk blocks we can | ||
1165 | * use, create a new journal superblock and initialise all of the | ||
1166 | * journal fields from scratch. | ||
1167 | **/ | ||
1168 | int jbd2_journal_create(journal_t *journal) | ||
1169 | { | ||
1170 | unsigned long long blocknr; | ||
1171 | struct buffer_head *bh; | ||
1172 | journal_superblock_t *sb; | ||
1173 | int i, err; | ||
1174 | |||
1175 | if (journal->j_maxlen < JBD2_MIN_JOURNAL_BLOCKS) { | ||
1176 | printk (KERN_ERR "Journal length (%d blocks) too short.\n", | ||
1177 | journal->j_maxlen); | ||
1178 | journal_fail_superblock(journal); | ||
1179 | return -EINVAL; | ||
1180 | } | ||
1181 | |||
1182 | if (journal->j_inode == NULL) { | ||
1183 | /* | ||
1184 | * We don't know what block to start at! | ||
1185 | */ | ||
1186 | printk(KERN_EMERG | ||
1187 | "%s: creation of journal on external device!\n", | ||
1188 | __func__); | ||
1189 | BUG(); | ||
1190 | } | ||
1191 | |||
1192 | /* Zero out the entire journal on disk. We cannot afford to | ||
1193 | have any blocks on disk beginning with JBD2_MAGIC_NUMBER. */ | ||
1194 | jbd_debug(1, "JBD: Zeroing out journal blocks...\n"); | ||
1195 | for (i = 0; i < journal->j_maxlen; i++) { | ||
1196 | err = jbd2_journal_bmap(journal, i, &blocknr); | ||
1197 | if (err) | ||
1198 | return err; | ||
1199 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); | ||
1200 | lock_buffer(bh); | ||
1201 | memset (bh->b_data, 0, journal->j_blocksize); | ||
1202 | BUFFER_TRACE(bh, "marking dirty"); | ||
1203 | mark_buffer_dirty(bh); | ||
1204 | BUFFER_TRACE(bh, "marking uptodate"); | ||
1205 | set_buffer_uptodate(bh); | ||
1206 | unlock_buffer(bh); | ||
1207 | __brelse(bh); | ||
1208 | } | ||
1209 | |||
1210 | sync_blockdev(journal->j_dev); | ||
1211 | jbd_debug(1, "JBD: journal cleared.\n"); | ||
1212 | |||
1213 | /* OK, fill in the initial static fields in the new superblock */ | ||
1214 | sb = journal->j_superblock; | ||
1215 | |||
1216 | sb->s_header.h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); | ||
1217 | sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2); | ||
1218 | |||
1219 | sb->s_blocksize = cpu_to_be32(journal->j_blocksize); | ||
1220 | sb->s_maxlen = cpu_to_be32(journal->j_maxlen); | ||
1221 | sb->s_first = cpu_to_be32(1); | ||
1222 | |||
1223 | journal->j_transaction_sequence = 1; | ||
1224 | |||
1225 | journal->j_flags &= ~JBD2_ABORT; | ||
1226 | journal->j_format_version = 2; | ||
1227 | |||
1228 | return journal_reset(journal); | ||
1229 | } | ||
1230 | |||
1231 | /** | ||
1232 | * void jbd2_journal_update_superblock() - Update journal sb on disk. | 1199 | * void jbd2_journal_update_superblock() - Update journal sb on disk. |
1233 | * @journal: The journal to update. | 1200 | * @journal: The journal to update. |
1234 | * @wait: Set to '0' if you don't want to wait for IO completion. | 1201 | * @wait: Set to '0' if you don't want to wait for IO completion. |
@@ -1472,7 +1439,9 @@ int jbd2_journal_destroy(journal_t *journal) | |||
1472 | spin_lock(&journal->j_list_lock); | 1439 | spin_lock(&journal->j_list_lock); |
1473 | while (journal->j_checkpoint_transactions != NULL) { | 1440 | while (journal->j_checkpoint_transactions != NULL) { |
1474 | spin_unlock(&journal->j_list_lock); | 1441 | spin_unlock(&journal->j_list_lock); |
1442 | mutex_lock(&journal->j_checkpoint_mutex); | ||
1475 | jbd2_log_do_checkpoint(journal); | 1443 | jbd2_log_do_checkpoint(journal); |
1444 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
1476 | spin_lock(&journal->j_list_lock); | 1445 | spin_lock(&journal->j_list_lock); |
1477 | } | 1446 | } |
1478 | 1447 | ||
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 39b7805a599a..46b4e347ed7d 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/timer.h> | 25 | #include <linux/timer.h> |
26 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
28 | #include <linux/hrtimer.h> | ||
28 | 29 | ||
29 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); | 30 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); |
30 | 31 | ||
@@ -48,6 +49,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
48 | { | 49 | { |
49 | transaction->t_journal = journal; | 50 | transaction->t_journal = journal; |
50 | transaction->t_state = T_RUNNING; | 51 | transaction->t_state = T_RUNNING; |
52 | transaction->t_start_time = ktime_get(); | ||
51 | transaction->t_tid = journal->j_transaction_sequence++; | 53 | transaction->t_tid = journal->j_transaction_sequence++; |
52 | transaction->t_expires = jiffies + journal->j_commit_interval; | 54 | transaction->t_expires = jiffies + journal->j_commit_interval; |
53 | spin_lock_init(&transaction->t_handle_lock); | 55 | spin_lock_init(&transaction->t_handle_lock); |
@@ -741,6 +743,12 @@ done: | |||
741 | source = kmap_atomic(page, KM_USER0); | 743 | source = kmap_atomic(page, KM_USER0); |
742 | memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); | 744 | memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); |
743 | kunmap_atomic(source, KM_USER0); | 745 | kunmap_atomic(source, KM_USER0); |
746 | |||
747 | /* | ||
748 | * Now that the frozen data is saved off, we need to store | ||
749 | * any matching triggers. | ||
750 | */ | ||
751 | jh->b_frozen_triggers = jh->b_triggers; | ||
744 | } | 752 | } |
745 | jbd_unlock_bh_state(bh); | 753 | jbd_unlock_bh_state(bh); |
746 | 754 | ||
@@ -944,6 +952,47 @@ out: | |||
944 | } | 952 | } |
945 | 953 | ||
946 | /** | 954 | /** |
955 | * void jbd2_journal_set_triggers() - Add triggers for commit writeout | ||
956 | * @bh: buffer to trigger on | ||
957 | * @type: struct jbd2_buffer_trigger_type containing the trigger(s). | ||
958 | * | ||
959 | * Set any triggers on this journal_head. This is always safe, because | ||
960 | * triggers for a committing buffer will be saved off, and triggers for | ||
961 | * a running transaction will match the buffer in that transaction. | ||
962 | * | ||
963 | * Call with NULL to clear the triggers. | ||
964 | */ | ||
965 | void jbd2_journal_set_triggers(struct buffer_head *bh, | ||
966 | struct jbd2_buffer_trigger_type *type) | ||
967 | { | ||
968 | struct journal_head *jh = bh2jh(bh); | ||
969 | |||
970 | jh->b_triggers = type; | ||
971 | } | ||
972 | |||
973 | void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data, | ||
974 | struct jbd2_buffer_trigger_type *triggers) | ||
975 | { | ||
976 | struct buffer_head *bh = jh2bh(jh); | ||
977 | |||
978 | if (!triggers || !triggers->t_commit) | ||
979 | return; | ||
980 | |||
981 | triggers->t_commit(triggers, bh, mapped_data, bh->b_size); | ||
982 | } | ||
983 | |||
984 | void jbd2_buffer_abort_trigger(struct journal_head *jh, | ||
985 | struct jbd2_buffer_trigger_type *triggers) | ||
986 | { | ||
987 | if (!triggers || !triggers->t_abort) | ||
988 | return; | ||
989 | |||
990 | triggers->t_abort(triggers, jh2bh(jh)); | ||
991 | } | ||
992 | |||
993 | |||
994 | |||
995 | /** | ||
947 | * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata | 996 | * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata |
948 | * @handle: transaction to add buffer to. | 997 | * @handle: transaction to add buffer to. |
949 | * @bh: buffer to mark | 998 | * @bh: buffer to mark |
@@ -1193,7 +1242,7 @@ int jbd2_journal_stop(handle_t *handle) | |||
1193 | { | 1242 | { |
1194 | transaction_t *transaction = handle->h_transaction; | 1243 | transaction_t *transaction = handle->h_transaction; |
1195 | journal_t *journal = transaction->t_journal; | 1244 | journal_t *journal = transaction->t_journal; |
1196 | int old_handle_count, err; | 1245 | int err; |
1197 | pid_t pid; | 1246 | pid_t pid; |
1198 | 1247 | ||
1199 | J_ASSERT(journal_current_handle() == handle); | 1248 | J_ASSERT(journal_current_handle() == handle); |
@@ -1216,24 +1265,54 @@ int jbd2_journal_stop(handle_t *handle) | |||
1216 | /* | 1265 | /* |
1217 | * Implement synchronous transaction batching. If the handle | 1266 | * Implement synchronous transaction batching. If the handle |
1218 | * was synchronous, don't force a commit immediately. Let's | 1267 | * was synchronous, don't force a commit immediately. Let's |
1219 | * yield and let another thread piggyback onto this transaction. | 1268 | * yield and let another thread piggyback onto this |
1220 | * Keep doing that while new threads continue to arrive. | 1269 | * transaction. Keep doing that while new threads continue to |
1221 | * It doesn't cost much - we're about to run a commit and sleep | 1270 | * arrive. It doesn't cost much - we're about to run a commit |
1222 | * on IO anyway. Speeds up many-threaded, many-dir operations | 1271 | * and sleep on IO anyway. Speeds up many-threaded, many-dir |
1223 | * by 30x or more... | 1272 | * operations by 30x or more... |
1224 | * | 1273 | * |
1225 | * But don't do this if this process was the most recent one to | 1274 | * We try and optimize the sleep time against what the |
1226 | * perform a synchronous write. We do this to detect the case where a | 1275 | * underlying disk can do, instead of having a static sleep |
1227 | * single process is doing a stream of sync writes. No point in waiting | 1276 | * time. This is useful for the case where our storage is so |
1228 | * for joiners in that case. | 1277 | * fast that it is more optimal to go ahead and force a flush |
1278 | * and wait for the transaction to be committed than it is to | ||
1279 | * wait for an arbitrary amount of time for new writers to | ||
1280 | * join the transaction. We achieve this by measuring how | ||
1281 | * long it takes to commit a transaction, and compare it with | ||
1282 | * how long this transaction has been running, and if run time | ||
1283 | * < commit time then we sleep for the delta and commit. This | ||
1284 | * greatly helps super fast disks that would see slowdowns as | ||
1285 | * more threads started doing fsyncs. | ||
1286 | * | ||
1287 | * But don't do this if this process was the most recent one | ||
1288 | * to perform a synchronous write. We do this to detect the | ||
1289 | * case where a single process is doing a stream of sync | ||
1290 | * writes. No point in waiting for joiners in that case. | ||
1229 | */ | 1291 | */ |
1230 | pid = current->pid; | 1292 | pid = current->pid; |
1231 | if (handle->h_sync && journal->j_last_sync_writer != pid) { | 1293 | if (handle->h_sync && journal->j_last_sync_writer != pid) { |
1294 | u64 commit_time, trans_time; | ||
1295 | |||
1232 | journal->j_last_sync_writer = pid; | 1296 | journal->j_last_sync_writer = pid; |
1233 | do { | 1297 | |
1234 | old_handle_count = transaction->t_handle_count; | 1298 | spin_lock(&journal->j_state_lock); |
1235 | schedule_timeout_uninterruptible(1); | 1299 | commit_time = journal->j_average_commit_time; |
1236 | } while (old_handle_count != transaction->t_handle_count); | 1300 | spin_unlock(&journal->j_state_lock); |
1301 | |||
1302 | trans_time = ktime_to_ns(ktime_sub(ktime_get(), | ||
1303 | transaction->t_start_time)); | ||
1304 | |||
1305 | commit_time = max_t(u64, commit_time, | ||
1306 | 1000*journal->j_min_batch_time); | ||
1307 | commit_time = min_t(u64, commit_time, | ||
1308 | 1000*journal->j_max_batch_time); | ||
1309 | |||
1310 | if (trans_time < commit_time) { | ||
1311 | ktime_t expires = ktime_add_ns(ktime_get(), | ||
1312 | commit_time); | ||
1313 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
1314 | schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); | ||
1315 | } | ||
1237 | } | 1316 | } |
1238 | 1317 | ||
1239 | current->journal_info = NULL; | 1318 | current->journal_info = NULL; |