aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2/commit.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd2/commit.c')
-rw-r--r--fs/jbd2/commit.c255
1 files changed, 202 insertions, 53 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6986f334c643..da8d0eb3b7b9 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -20,6 +20,8 @@
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/mm.h> 21#include <linux/mm.h>
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/jiffies.h>
24#include <linux/crc32.h>
23 25
24/* 26/*
25 * Default IO end handler for temporary BJ_IO buffer_heads. 27 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -92,19 +94,23 @@ static int inverted_lock(journal_t *journal, struct buffer_head *bh)
92 return 1; 94 return 1;
93} 95}
94 96
95/* Done it all: now write the commit record. We should have 97/*
98 * Done it all: now submit the commit record. We should have
96 * cleaned up our previous buffers by now, so if we are in abort 99 * cleaned up our previous buffers by now, so if we are in abort
97 * mode we can now just skip the rest of the journal write 100 * mode we can now just skip the rest of the journal write
98 * entirely. 101 * entirely.
99 * 102 *
100 * Returns 1 if the journal needs to be aborted or 0 on success 103 * Returns 1 if the journal needs to be aborted or 0 on success
101 */ 104 */
102static int journal_write_commit_record(journal_t *journal, 105static int journal_submit_commit_record(journal_t *journal,
103 transaction_t *commit_transaction) 106 transaction_t *commit_transaction,
107 struct buffer_head **cbh,
108 __u32 crc32_sum)
104{ 109{
105 struct journal_head *descriptor; 110 struct journal_head *descriptor;
111 struct commit_header *tmp;
106 struct buffer_head *bh; 112 struct buffer_head *bh;
107 int i, ret; 113 int ret;
108 int barrier_done = 0; 114 int barrier_done = 0;
109 115
110 if (is_journal_aborted(journal)) 116 if (is_journal_aborted(journal))
@@ -116,21 +122,33 @@ static int journal_write_commit_record(journal_t *journal,
116 122
117 bh = jh2bh(descriptor); 123 bh = jh2bh(descriptor);
118 124
119 /* AKPM: buglet - add `i' to tmp! */ 125 tmp = (struct commit_header *)bh->b_data;
120 for (i = 0; i < bh->b_size; i += 512) { 126 tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
121 journal_header_t *tmp = (journal_header_t*)bh->b_data; 127 tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
122 tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); 128 tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
123 tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); 129
124 tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); 130 if (JBD2_HAS_COMPAT_FEATURE(journal,
131 JBD2_FEATURE_COMPAT_CHECKSUM)) {
132 tmp->h_chksum_type = JBD2_CRC32_CHKSUM;
133 tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE;
134 tmp->h_chksum[0] = cpu_to_be32(crc32_sum);
125 } 135 }
126 136
127 JBUFFER_TRACE(descriptor, "write commit block"); 137 JBUFFER_TRACE(descriptor, "submit commit block");
138 lock_buffer(bh);
139
128 set_buffer_dirty(bh); 140 set_buffer_dirty(bh);
129 if (journal->j_flags & JBD2_BARRIER) { 141 set_buffer_uptodate(bh);
142 bh->b_end_io = journal_end_buffer_io_sync;
143
144 if (journal->j_flags & JBD2_BARRIER &&
145 !JBD2_HAS_COMPAT_FEATURE(journal,
146 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
130 set_buffer_ordered(bh); 147 set_buffer_ordered(bh);
131 barrier_done = 1; 148 barrier_done = 1;
132 } 149 }
133 ret = sync_dirty_buffer(bh); 150 ret = submit_bh(WRITE, bh);
151
134 /* is it possible for another commit to fail at roughly 152 /* is it possible for another commit to fail at roughly
135 * the same time as this one? If so, we don't want to 153 * the same time as this one? If so, we don't want to
136 * trust the barrier flag in the super, but instead want 154 * trust the barrier flag in the super, but instead want
@@ -151,14 +169,72 @@ static int journal_write_commit_record(journal_t *journal,
151 clear_buffer_ordered(bh); 169 clear_buffer_ordered(bh);
152 set_buffer_uptodate(bh); 170 set_buffer_uptodate(bh);
153 set_buffer_dirty(bh); 171 set_buffer_dirty(bh);
154 ret = sync_dirty_buffer(bh); 172 ret = submit_bh(WRITE, bh);
155 } 173 }
156 put_bh(bh); /* One for getblk() */ 174 *cbh = bh;
157 jbd2_journal_put_journal_head(descriptor); 175 return ret;
176}
177
178/*
179 * This function along with journal_submit_commit_record
180 * allows to write the commit record asynchronously.
181 */
182static int journal_wait_on_commit_record(struct buffer_head *bh)
183{
184 int ret = 0;
185
186 clear_buffer_dirty(bh);
187 wait_on_buffer(bh);
188
189 if (unlikely(!buffer_uptodate(bh)))
190 ret = -EIO;
191 put_bh(bh); /* One for getblk() */
192 jbd2_journal_put_journal_head(bh2jh(bh));
158 193
159 return (ret == -EIO); 194 return ret;
160} 195}
161 196
197/*
198 * Wait for all submitted IO to complete.
199 */
200static int journal_wait_on_locked_list(journal_t *journal,
201 transaction_t *commit_transaction)
202{
203 int ret = 0;
204 struct journal_head *jh;
205
206 while (commit_transaction->t_locked_list) {
207 struct buffer_head *bh;
208
209 jh = commit_transaction->t_locked_list->b_tprev;
210 bh = jh2bh(jh);
211 get_bh(bh);
212 if (buffer_locked(bh)) {
213 spin_unlock(&journal->j_list_lock);
214 wait_on_buffer(bh);
215 if (unlikely(!buffer_uptodate(bh)))
216 ret = -EIO;
217 spin_lock(&journal->j_list_lock);
218 }
219 if (!inverted_lock(journal, bh)) {
220 put_bh(bh);
221 spin_lock(&journal->j_list_lock);
222 continue;
223 }
224 if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
225 __jbd2_journal_unfile_buffer(jh);
226 jbd_unlock_bh_state(bh);
227 jbd2_journal_remove_journal_head(bh);
228 put_bh(bh);
229 } else {
230 jbd_unlock_bh_state(bh);
231 }
232 put_bh(bh);
233 cond_resched_lock(&journal->j_list_lock);
234 }
235 return ret;
236 }
237
162static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) 238static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
163{ 239{
164 int i; 240 int i;
@@ -274,7 +350,21 @@ write_out_data:
274 journal_do_submit_data(wbuf, bufs); 350 journal_do_submit_data(wbuf, bufs);
275} 351}
276 352
277static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag, 353static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
354{
355 struct page *page = bh->b_page;
356 char *addr;
357 __u32 checksum;
358
359 addr = kmap_atomic(page, KM_USER0);
360 checksum = crc32_be(crc32_sum,
361 (void *)(addr + offset_in_page(bh->b_data)), bh->b_size);
362 kunmap_atomic(addr, KM_USER0);
363
364 return checksum;
365}
366
367static void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
278 unsigned long long block) 368 unsigned long long block)
279{ 369{
280 tag->t_blocknr = cpu_to_be32(block & (u32)~0); 370 tag->t_blocknr = cpu_to_be32(block & (u32)~0);
@@ -290,6 +380,7 @@ static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
290 */ 380 */
291void jbd2_journal_commit_transaction(journal_t *journal) 381void jbd2_journal_commit_transaction(journal_t *journal)
292{ 382{
383 struct transaction_stats_s stats;
293 transaction_t *commit_transaction; 384 transaction_t *commit_transaction;
294 struct journal_head *jh, *new_jh, *descriptor; 385 struct journal_head *jh, *new_jh, *descriptor;
295 struct buffer_head **wbuf = journal->j_wbuf; 386 struct buffer_head **wbuf = journal->j_wbuf;
@@ -305,6 +396,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
305 int tag_flag; 396 int tag_flag;
306 int i; 397 int i;
307 int tag_bytes = journal_tag_bytes(journal); 398 int tag_bytes = journal_tag_bytes(journal);
399 struct buffer_head *cbh = NULL; /* For transactional checksums */
400 __u32 crc32_sum = ~0;
308 401
309 /* 402 /*
310 * First job: lock down the current transaction and wait for 403 * First job: lock down the current transaction and wait for
@@ -337,6 +430,11 @@ void jbd2_journal_commit_transaction(journal_t *journal)
337 spin_lock(&journal->j_state_lock); 430 spin_lock(&journal->j_state_lock);
338 commit_transaction->t_state = T_LOCKED; 431 commit_transaction->t_state = T_LOCKED;
339 432
433 stats.u.run.rs_wait = commit_transaction->t_max_wait;
434 stats.u.run.rs_locked = jiffies;
435 stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
436 stats.u.run.rs_locked);
437
340 spin_lock(&commit_transaction->t_handle_lock); 438 spin_lock(&commit_transaction->t_handle_lock);
341 while (commit_transaction->t_updates) { 439 while (commit_transaction->t_updates) {
342 DEFINE_WAIT(wait); 440 DEFINE_WAIT(wait);
@@ -407,6 +505,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
407 */ 505 */
408 jbd2_journal_switch_revoke_table(journal); 506 jbd2_journal_switch_revoke_table(journal);
409 507
508 stats.u.run.rs_flushing = jiffies;
509 stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked,
510 stats.u.run.rs_flushing);
511
410 commit_transaction->t_state = T_FLUSH; 512 commit_transaction->t_state = T_FLUSH;
411 journal->j_committing_transaction = commit_transaction; 513 journal->j_committing_transaction = commit_transaction;
412 journal->j_running_transaction = NULL; 514 journal->j_running_transaction = NULL;
@@ -440,38 +542,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
440 journal_submit_data_buffers(journal, commit_transaction); 542 journal_submit_data_buffers(journal, commit_transaction);
441 543
442 /* 544 /*
443 * Wait for all previously submitted IO to complete. 545 * Wait for all previously submitted IO to complete if commit
546 * record is to be written synchronously.
444 */ 547 */
445 spin_lock(&journal->j_list_lock); 548 spin_lock(&journal->j_list_lock);
446 while (commit_transaction->t_locked_list) { 549 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
447 struct buffer_head *bh; 550 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
551 err = journal_wait_on_locked_list(journal,
552 commit_transaction);
448 553
449 jh = commit_transaction->t_locked_list->b_tprev;
450 bh = jh2bh(jh);
451 get_bh(bh);
452 if (buffer_locked(bh)) {
453 spin_unlock(&journal->j_list_lock);
454 wait_on_buffer(bh);
455 if (unlikely(!buffer_uptodate(bh)))
456 err = -EIO;
457 spin_lock(&journal->j_list_lock);
458 }
459 if (!inverted_lock(journal, bh)) {
460 put_bh(bh);
461 spin_lock(&journal->j_list_lock);
462 continue;
463 }
464 if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
465 __jbd2_journal_unfile_buffer(jh);
466 jbd_unlock_bh_state(bh);
467 jbd2_journal_remove_journal_head(bh);
468 put_bh(bh);
469 } else {
470 jbd_unlock_bh_state(bh);
471 }
472 put_bh(bh);
473 cond_resched_lock(&journal->j_list_lock);
474 }
475 spin_unlock(&journal->j_list_lock); 554 spin_unlock(&journal->j_list_lock);
476 555
477 if (err) 556 if (err)
@@ -498,6 +577,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
498 */ 577 */
499 commit_transaction->t_state = T_COMMIT; 578 commit_transaction->t_state = T_COMMIT;
500 579
580 stats.u.run.rs_logging = jiffies;
581 stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing,
582 stats.u.run.rs_logging);
583 stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits;
584 stats.u.run.rs_blocks_logged = 0;
585
501 descriptor = NULL; 586 descriptor = NULL;
502 bufs = 0; 587 bufs = 0;
503 while (commit_transaction->t_buffers) { 588 while (commit_transaction->t_buffers) {
@@ -639,6 +724,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
639start_journal_io: 724start_journal_io:
640 for (i = 0; i < bufs; i++) { 725 for (i = 0; i < bufs; i++) {
641 struct buffer_head *bh = wbuf[i]; 726 struct buffer_head *bh = wbuf[i];
727 /*
728 * Compute checksum.
729 */
730 if (JBD2_HAS_COMPAT_FEATURE(journal,
731 JBD2_FEATURE_COMPAT_CHECKSUM)) {
732 crc32_sum =
733 jbd2_checksum_data(crc32_sum, bh);
734 }
735
642 lock_buffer(bh); 736 lock_buffer(bh);
643 clear_buffer_dirty(bh); 737 clear_buffer_dirty(bh);
644 set_buffer_uptodate(bh); 738 set_buffer_uptodate(bh);
@@ -646,6 +740,7 @@ start_journal_io:
646 submit_bh(WRITE, bh); 740 submit_bh(WRITE, bh);
647 } 741 }
648 cond_resched(); 742 cond_resched();
743 stats.u.run.rs_blocks_logged += bufs;
649 744
650 /* Force a new descriptor to be generated next 745 /* Force a new descriptor to be generated next
651 time round the loop. */ 746 time round the loop. */
@@ -654,6 +749,23 @@ start_journal_io:
654 } 749 }
655 } 750 }
656 751
752 /* Done it all: now write the commit record asynchronously. */
753
754 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
755 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
756 err = journal_submit_commit_record(journal, commit_transaction,
757 &cbh, crc32_sum);
758 if (err)
759 __jbd2_journal_abort_hard(journal);
760
761 spin_lock(&journal->j_list_lock);
762 err = journal_wait_on_locked_list(journal,
763 commit_transaction);
764 spin_unlock(&journal->j_list_lock);
765 if (err)
766 __jbd2_journal_abort_hard(journal);
767 }
768
657 /* Lo and behold: we have just managed to send a transaction to 769 /* Lo and behold: we have just managed to send a transaction to
658 the log. Before we can commit it, wait for the IO so far to 770 the log. Before we can commit it, wait for the IO so far to
659 complete. Control buffers being written are on the 771 complete. Control buffers being written are on the
@@ -753,8 +865,14 @@ wait_for_iobuf:
753 865
754 jbd_debug(3, "JBD: commit phase 6\n"); 866 jbd_debug(3, "JBD: commit phase 6\n");
755 867
756 if (journal_write_commit_record(journal, commit_transaction)) 868 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
757 err = -EIO; 869 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
870 err = journal_submit_commit_record(journal, commit_transaction,
871 &cbh, crc32_sum);
872 if (err)
873 __jbd2_journal_abort_hard(journal);
874 }
875 err = journal_wait_on_commit_record(cbh);
758 876
759 if (err) 877 if (err)
760 jbd2_journal_abort(journal, err); 878 jbd2_journal_abort(journal, err);
@@ -816,6 +934,7 @@ restart_loop:
816 cp_transaction = jh->b_cp_transaction; 934 cp_transaction = jh->b_cp_transaction;
817 if (cp_transaction) { 935 if (cp_transaction) {
818 JBUFFER_TRACE(jh, "remove from old cp transaction"); 936 JBUFFER_TRACE(jh, "remove from old cp transaction");
937 cp_transaction->t_chp_stats.cs_dropped++;
819 __jbd2_journal_remove_checkpoint(jh); 938 __jbd2_journal_remove_checkpoint(jh);
820 } 939 }
821 940
@@ -867,10 +986,10 @@ restart_loop:
867 } 986 }
868 spin_unlock(&journal->j_list_lock); 987 spin_unlock(&journal->j_list_lock);
869 /* 988 /*
870 * This is a bit sleazy. We borrow j_list_lock to protect 989 * This is a bit sleazy. We use j_list_lock to protect transition
871 * journal->j_committing_transaction in __jbd2_journal_remove_checkpoint. 990 * of a transaction into T_FINISHED state and calling
872 * Really, __jbd2_journal_remove_checkpoint should be using j_state_lock but 991 * __jbd2_journal_drop_transaction(). Otherwise we could race with
873 * it's a bit hassle to hold that across __jbd2_journal_remove_checkpoint 992 * other checkpointing code processing the transaction...
874 */ 993 */
875 spin_lock(&journal->j_state_lock); 994 spin_lock(&journal->j_state_lock);
876 spin_lock(&journal->j_list_lock); 995 spin_lock(&journal->j_list_lock);
@@ -890,6 +1009,36 @@ restart_loop:
890 1009
891 J_ASSERT(commit_transaction->t_state == T_COMMIT); 1010 J_ASSERT(commit_transaction->t_state == T_COMMIT);
892 1011
1012 commit_transaction->t_start = jiffies;
1013 stats.u.run.rs_logging = jbd2_time_diff(stats.u.run.rs_logging,
1014 commit_transaction->t_start);
1015
1016 /*
1017 * File the transaction for history
1018 */
1019 stats.ts_type = JBD2_STATS_RUN;
1020 stats.ts_tid = commit_transaction->t_tid;
1021 stats.u.run.rs_handle_count = commit_transaction->t_handle_count;
1022 spin_lock(&journal->j_history_lock);
1023 memcpy(journal->j_history + journal->j_history_cur, &stats,
1024 sizeof(stats));
1025 if (++journal->j_history_cur == journal->j_history_max)
1026 journal->j_history_cur = 0;
1027
1028 /*
1029 * Calculate overall stats
1030 */
1031 journal->j_stats.ts_tid++;
1032 journal->j_stats.u.run.rs_wait += stats.u.run.rs_wait;
1033 journal->j_stats.u.run.rs_running += stats.u.run.rs_running;
1034 journal->j_stats.u.run.rs_locked += stats.u.run.rs_locked;
1035 journal->j_stats.u.run.rs_flushing += stats.u.run.rs_flushing;
1036 journal->j_stats.u.run.rs_logging += stats.u.run.rs_logging;
1037 journal->j_stats.u.run.rs_handle_count += stats.u.run.rs_handle_count;
1038 journal->j_stats.u.run.rs_blocks += stats.u.run.rs_blocks;
1039 journal->j_stats.u.run.rs_blocks_logged += stats.u.run.rs_blocks_logged;
1040 spin_unlock(&journal->j_history_lock);
1041
893 commit_transaction->t_state = T_FINISHED; 1042 commit_transaction->t_state = T_FINISHED;
894 J_ASSERT(commit_transaction == journal->j_committing_transaction); 1043 J_ASSERT(commit_transaction == journal->j_committing_transaction);
895 journal->j_commit_sequence = commit_transaction->t_tid; 1044 journal->j_commit_sequence = commit_transaction->t_tid;