aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2/commit.c
diff options
context:
space:
mode:
authorGirish Shilamkar <girish@clusterfs.com>2008-01-28 23:58:27 -0500
committerTheodore Ts'o <tytso@mit.edu>2008-01-28 23:58:27 -0500
commit818d276ceb83aa9fdebb5e0a53188290312de987 (patch)
treede3fb4ffadd72caea2876c5232ce76cd14b3646e /fs/jbd2/commit.c
parent8e85fb3f305b24b79c6d9cb7a56d22b062335ad3 (diff)
ext4: Add the journal checksum feature
The journal checksum feature adds two new flags i.e JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT and JBD2_FEATURE_COMPAT_CHECKSUM. JBD2_FEATURE_CHECKSUM flag indicates that the commit block contains the checksum for the blocks described by the descriptor blocks. Due to checksums, writing of the commit record no longer needs to be synchronous. Now commit record can be sent to disk without waiting for descriptor blocks to be written to disk. This behavior is controlled using JBD2_FEATURE_ASYNC_COMMIT flag. Older kernels/e2fsck should not be able to recover the journal with _ASYNC_COMMIT hence it is made incompat. The commit header has been extended to hold the checksum along with the type of the checksum. For recovery in pass scan checksums are verified to ensure the sanity and completeness(in case of _ASYNC_COMMIT) of every transaction. Signed-off-by: Andreas Dilger <adilger@clusterfs.com> Signed-off-by: Girish Shilamkar <girish@clusterfs.com> Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com> Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Diffstat (limited to 'fs/jbd2/commit.c')
-rw-r--r--fs/jbd2/commit.c198
1 files changed, 149 insertions, 49 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 8749a86f4175..da8d0eb3b7b9 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -21,6 +21,7 @@
21#include <linux/mm.h> 21#include <linux/mm.h>
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/jiffies.h> 23#include <linux/jiffies.h>
24#include <linux/crc32.h>
24 25
25/* 26/*
26 * Default IO end handler for temporary BJ_IO buffer_heads. 27 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -93,19 +94,23 @@ static int inverted_lock(journal_t *journal, struct buffer_head *bh)
93 return 1; 94 return 1;
94} 95}
95 96
96/* Done it all: now write the commit record. We should have 97/*
98 * Done it all: now submit the commit record. We should have
97 * cleaned up our previous buffers by now, so if we are in abort 99 * cleaned up our previous buffers by now, so if we are in abort
98 * mode we can now just skip the rest of the journal write 100 * mode we can now just skip the rest of the journal write
99 * entirely. 101 * entirely.
100 * 102 *
101 * Returns 1 if the journal needs to be aborted or 0 on success 103 * Returns 1 if the journal needs to be aborted or 0 on success
102 */ 104 */
103static int journal_write_commit_record(journal_t *journal, 105static int journal_submit_commit_record(journal_t *journal,
104 transaction_t *commit_transaction) 106 transaction_t *commit_transaction,
107 struct buffer_head **cbh,
108 __u32 crc32_sum)
105{ 109{
106 struct journal_head *descriptor; 110 struct journal_head *descriptor;
111 struct commit_header *tmp;
107 struct buffer_head *bh; 112 struct buffer_head *bh;
108 int i, ret; 113 int ret;
109 int barrier_done = 0; 114 int barrier_done = 0;
110 115
111 if (is_journal_aborted(journal)) 116 if (is_journal_aborted(journal))
@@ -117,21 +122,33 @@ static int journal_write_commit_record(journal_t *journal,
117 122
118 bh = jh2bh(descriptor); 123 bh = jh2bh(descriptor);
119 124
120 /* AKPM: buglet - add `i' to tmp! */ 125 tmp = (struct commit_header *)bh->b_data;
121 for (i = 0; i < bh->b_size; i += 512) { 126 tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
122 journal_header_t *tmp = (journal_header_t*)bh->b_data; 127 tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
123 tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); 128 tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
124 tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); 129
125 tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); 130 if (JBD2_HAS_COMPAT_FEATURE(journal,
131 JBD2_FEATURE_COMPAT_CHECKSUM)) {
132 tmp->h_chksum_type = JBD2_CRC32_CHKSUM;
133 tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE;
134 tmp->h_chksum[0] = cpu_to_be32(crc32_sum);
126 } 135 }
127 136
128 JBUFFER_TRACE(descriptor, "write commit block"); 137 JBUFFER_TRACE(descriptor, "submit commit block");
138 lock_buffer(bh);
139
129 set_buffer_dirty(bh); 140 set_buffer_dirty(bh);
130 if (journal->j_flags & JBD2_BARRIER) { 141 set_buffer_uptodate(bh);
142 bh->b_end_io = journal_end_buffer_io_sync;
143
144 if (journal->j_flags & JBD2_BARRIER &&
145 !JBD2_HAS_COMPAT_FEATURE(journal,
146 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
131 set_buffer_ordered(bh); 147 set_buffer_ordered(bh);
132 barrier_done = 1; 148 barrier_done = 1;
133 } 149 }
134 ret = sync_dirty_buffer(bh); 150 ret = submit_bh(WRITE, bh);
151
135 /* is it possible for another commit to fail at roughly 152 /* is it possible for another commit to fail at roughly
136 * the same time as this one? If so, we don't want to 153 * the same time as this one? If so, we don't want to
137 * trust the barrier flag in the super, but instead want 154 * trust the barrier flag in the super, but instead want
@@ -152,14 +169,72 @@ static int journal_write_commit_record(journal_t *journal,
152 clear_buffer_ordered(bh); 169 clear_buffer_ordered(bh);
153 set_buffer_uptodate(bh); 170 set_buffer_uptodate(bh);
154 set_buffer_dirty(bh); 171 set_buffer_dirty(bh);
155 ret = sync_dirty_buffer(bh); 172 ret = submit_bh(WRITE, bh);
156 } 173 }
157 put_bh(bh); /* One for getblk() */ 174 *cbh = bh;
158 jbd2_journal_put_journal_head(descriptor); 175 return ret;
176}
177
178/*
179 * This function along with journal_submit_commit_record
180 * allows to write the commit record asynchronously.
181 */
182static int journal_wait_on_commit_record(struct buffer_head *bh)
183{
184 int ret = 0;
185
186 clear_buffer_dirty(bh);
187 wait_on_buffer(bh);
159 188
160 return (ret == -EIO); 189 if (unlikely(!buffer_uptodate(bh)))
190 ret = -EIO;
191 put_bh(bh); /* One for getblk() */
192 jbd2_journal_put_journal_head(bh2jh(bh));
193
194 return ret;
161} 195}
162 196
197/*
198 * Wait for all submitted IO to complete.
199 */
200static int journal_wait_on_locked_list(journal_t *journal,
201 transaction_t *commit_transaction)
202{
203 int ret = 0;
204 struct journal_head *jh;
205
206 while (commit_transaction->t_locked_list) {
207 struct buffer_head *bh;
208
209 jh = commit_transaction->t_locked_list->b_tprev;
210 bh = jh2bh(jh);
211 get_bh(bh);
212 if (buffer_locked(bh)) {
213 spin_unlock(&journal->j_list_lock);
214 wait_on_buffer(bh);
215 if (unlikely(!buffer_uptodate(bh)))
216 ret = -EIO;
217 spin_lock(&journal->j_list_lock);
218 }
219 if (!inverted_lock(journal, bh)) {
220 put_bh(bh);
221 spin_lock(&journal->j_list_lock);
222 continue;
223 }
224 if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
225 __jbd2_journal_unfile_buffer(jh);
226 jbd_unlock_bh_state(bh);
227 jbd2_journal_remove_journal_head(bh);
228 put_bh(bh);
229 } else {
230 jbd_unlock_bh_state(bh);
231 }
232 put_bh(bh);
233 cond_resched_lock(&journal->j_list_lock);
234 }
235 return ret;
236 }
237
163static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) 238static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
164{ 239{
165 int i; 240 int i;
@@ -275,7 +350,21 @@ write_out_data:
275 journal_do_submit_data(wbuf, bufs); 350 journal_do_submit_data(wbuf, bufs);
276} 351}
277 352
278static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag, 353static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
354{
355 struct page *page = bh->b_page;
356 char *addr;
357 __u32 checksum;
358
359 addr = kmap_atomic(page, KM_USER0);
360 checksum = crc32_be(crc32_sum,
361 (void *)(addr + offset_in_page(bh->b_data)), bh->b_size);
362 kunmap_atomic(addr, KM_USER0);
363
364 return checksum;
365}
366
367static void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
279 unsigned long long block) 368 unsigned long long block)
280{ 369{
281 tag->t_blocknr = cpu_to_be32(block & (u32)~0); 370 tag->t_blocknr = cpu_to_be32(block & (u32)~0);
@@ -307,6 +396,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
307 int tag_flag; 396 int tag_flag;
308 int i; 397 int i;
309 int tag_bytes = journal_tag_bytes(journal); 398 int tag_bytes = journal_tag_bytes(journal);
399 struct buffer_head *cbh = NULL; /* For transactional checksums */
400 __u32 crc32_sum = ~0;
310 401
311 /* 402 /*
312 * First job: lock down the current transaction and wait for 403 * First job: lock down the current transaction and wait for
@@ -451,38 +542,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
451 journal_submit_data_buffers(journal, commit_transaction); 542 journal_submit_data_buffers(journal, commit_transaction);
452 543
453 /* 544 /*
454 * Wait for all previously submitted IO to complete. 545 * Wait for all previously submitted IO to complete if commit
546 * record is to be written synchronously.
455 */ 547 */
456 spin_lock(&journal->j_list_lock); 548 spin_lock(&journal->j_list_lock);
457 while (commit_transaction->t_locked_list) { 549 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
458 struct buffer_head *bh; 550 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
551 err = journal_wait_on_locked_list(journal,
552 commit_transaction);
459 553
460 jh = commit_transaction->t_locked_list->b_tprev;
461 bh = jh2bh(jh);
462 get_bh(bh);
463 if (buffer_locked(bh)) {
464 spin_unlock(&journal->j_list_lock);
465 wait_on_buffer(bh);
466 if (unlikely(!buffer_uptodate(bh)))
467 err = -EIO;
468 spin_lock(&journal->j_list_lock);
469 }
470 if (!inverted_lock(journal, bh)) {
471 put_bh(bh);
472 spin_lock(&journal->j_list_lock);
473 continue;
474 }
475 if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
476 __jbd2_journal_unfile_buffer(jh);
477 jbd_unlock_bh_state(bh);
478 jbd2_journal_remove_journal_head(bh);
479 put_bh(bh);
480 } else {
481 jbd_unlock_bh_state(bh);
482 }
483 put_bh(bh);
484 cond_resched_lock(&journal->j_list_lock);
485 }
486 spin_unlock(&journal->j_list_lock); 554 spin_unlock(&journal->j_list_lock);
487 555
488 if (err) 556 if (err)
@@ -656,6 +724,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
656start_journal_io: 724start_journal_io:
657 for (i = 0; i < bufs; i++) { 725 for (i = 0; i < bufs; i++) {
658 struct buffer_head *bh = wbuf[i]; 726 struct buffer_head *bh = wbuf[i];
727 /*
728 * Compute checksum.
729 */
730 if (JBD2_HAS_COMPAT_FEATURE(journal,
731 JBD2_FEATURE_COMPAT_CHECKSUM)) {
732 crc32_sum =
733 jbd2_checksum_data(crc32_sum, bh);
734 }
735
659 lock_buffer(bh); 736 lock_buffer(bh);
660 clear_buffer_dirty(bh); 737 clear_buffer_dirty(bh);
661 set_buffer_uptodate(bh); 738 set_buffer_uptodate(bh);
@@ -672,6 +749,23 @@ start_journal_io:
672 } 749 }
673 } 750 }
674 751
752 /* Done it all: now write the commit record asynchronously. */
753
754 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
755 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
756 err = journal_submit_commit_record(journal, commit_transaction,
757 &cbh, crc32_sum);
758 if (err)
759 __jbd2_journal_abort_hard(journal);
760
761 spin_lock(&journal->j_list_lock);
762 err = journal_wait_on_locked_list(journal,
763 commit_transaction);
764 spin_unlock(&journal->j_list_lock);
765 if (err)
766 __jbd2_journal_abort_hard(journal);
767 }
768
675 /* Lo and behold: we have just managed to send a transaction to 769 /* Lo and behold: we have just managed to send a transaction to
676 the log. Before we can commit it, wait for the IO so far to 770 the log. Before we can commit it, wait for the IO so far to
677 complete. Control buffers being written are on the 771 complete. Control buffers being written are on the
@@ -771,8 +865,14 @@ wait_for_iobuf:
771 865
772 jbd_debug(3, "JBD: commit phase 6\n"); 866 jbd_debug(3, "JBD: commit phase 6\n");
773 867
774 if (journal_write_commit_record(journal, commit_transaction)) 868 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
775 err = -EIO; 869 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
870 err = journal_submit_commit_record(journal, commit_transaction,
871 &cbh, crc32_sum);
872 if (err)
873 __jbd2_journal_abort_hard(journal);
874 }
875 err = journal_wait_on_commit_record(cbh);
776 876
777 if (err) 877 if (err)
778 jbd2_journal_abort(journal, err); 878 jbd2_journal_abort(journal, err);