aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2/recovery.c
diff options
context:
space:
mode:
authorGirish Shilamkar <girish@clusterfs.com>2008-01-28 23:58:27 -0500
committerTheodore Ts'o <tytso@mit.edu>2008-01-28 23:58:27 -0500
commit818d276ceb83aa9fdebb5e0a53188290312de987 (patch)
treede3fb4ffadd72caea2876c5232ce76cd14b3646e /fs/jbd2/recovery.c
parent8e85fb3f305b24b79c6d9cb7a56d22b062335ad3 (diff)
ext4: Add the journal checksum feature
The journal checksum feature adds two new flags i.e JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT and JBD2_FEATURE_COMPAT_CHECKSUM. JBD2_FEATURE_CHECKSUM flag indicates that the commit block contains the checksum for the blocks described by the descriptor blocks. Due to checksums, writing of the commit record no longer needs to be synchronous. Now commit record can be sent to disk without waiting for descriptor blocks to be written to disk. This behavior is controlled using JBD2_FEATURE_ASYNC_COMMIT flag. Older kernels/e2fsck should not be able to recover the journal with _ASYNC_COMMIT hence it is made incompat. The commit header has been extended to hold the checksum along with the type of the checksum. For recovery in pass scan checksums are verified to ensure the sanity and completeness(in case of _ASYNC_COMMIT) of every transaction. Signed-off-by: Andreas Dilger <adilger@clusterfs.com> Signed-off-by: Girish Shilamkar <girish@clusterfs.com> Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com> Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Diffstat (limited to 'fs/jbd2/recovery.c')
-rw-r--r--fs/jbd2/recovery.c151
1 files changed, 143 insertions, 8 deletions
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index d0ce627539ef..921680663fa2 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -21,6 +21,7 @@
21#include <linux/jbd2.h> 21#include <linux/jbd2.h>
22#include <linux/errno.h> 22#include <linux/errno.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/crc32.h>
24#endif 25#endif
25 26
26/* 27/*
@@ -316,6 +317,37 @@ static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag
316 return block; 317 return block;
317} 318}
318 319
320/*
321 * calc_chksums calculates the checksums for the blocks described in the
322 * descriptor block.
323 */
324static int calc_chksums(journal_t *journal, struct buffer_head *bh,
325 unsigned long *next_log_block, __u32 *crc32_sum)
326{
327 int i, num_blks, err;
328 unsigned long io_block;
329 struct buffer_head *obh;
330
331 num_blks = count_tags(journal, bh);
332 /* Calculate checksum of the descriptor block. */
333 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
334
335 for (i = 0; i < num_blks; i++) {
336 io_block = (*next_log_block)++;
337 wrap(journal, *next_log_block);
338 err = jread(&obh, journal, io_block);
339 if (err) {
340 printk(KERN_ERR "JBD: IO error %d recovering block "
341 "%lu in log\n", err, io_block);
342 return 1;
343 } else {
344 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
345 obh->b_size);
346 }
347 }
348 return 0;
349}
350
319static int do_one_pass(journal_t *journal, 351static int do_one_pass(journal_t *journal,
320 struct recovery_info *info, enum passtype pass) 352 struct recovery_info *info, enum passtype pass)
321{ 353{
@@ -328,6 +360,7 @@ static int do_one_pass(journal_t *journal,
328 unsigned int sequence; 360 unsigned int sequence;
329 int blocktype; 361 int blocktype;
330 int tag_bytes = journal_tag_bytes(journal); 362 int tag_bytes = journal_tag_bytes(journal);
363 __u32 crc32_sum = ~0; /* Transactional Checksums */
331 364
332 /* Precompute the maximum metadata descriptors in a descriptor block */ 365 /* Precompute the maximum metadata descriptors in a descriptor block */
333 int MAX_BLOCKS_PER_DESC; 366 int MAX_BLOCKS_PER_DESC;
@@ -419,12 +452,26 @@ static int do_one_pass(journal_t *journal,
419 switch(blocktype) { 452 switch(blocktype) {
420 case JBD2_DESCRIPTOR_BLOCK: 453 case JBD2_DESCRIPTOR_BLOCK:
421 /* If it is a valid descriptor block, replay it 454 /* If it is a valid descriptor block, replay it
422 * in pass REPLAY; otherwise, just skip over the 455 * in pass REPLAY; if journal_checksums enabled, then
423 * blocks it describes. */ 456 * calculate checksums in PASS_SCAN, otherwise,
457 * just skip over the blocks it describes. */
424 if (pass != PASS_REPLAY) { 458 if (pass != PASS_REPLAY) {
459 if (pass == PASS_SCAN &&
460 JBD2_HAS_COMPAT_FEATURE(journal,
461 JBD2_FEATURE_COMPAT_CHECKSUM) &&
462 !info->end_transaction) {
463 if (calc_chksums(journal, bh,
464 &next_log_block,
465 &crc32_sum)) {
466 put_bh(bh);
467 break;
468 }
469 put_bh(bh);
470 continue;
471 }
425 next_log_block += count_tags(journal, bh); 472 next_log_block += count_tags(journal, bh);
426 wrap(journal, next_log_block); 473 wrap(journal, next_log_block);
427 brelse(bh); 474 put_bh(bh);
428 continue; 475 continue;
429 } 476 }
430 477
@@ -516,9 +563,96 @@ static int do_one_pass(journal_t *journal,
516 continue; 563 continue;
517 564
518 case JBD2_COMMIT_BLOCK: 565 case JBD2_COMMIT_BLOCK:
519 /* Found an expected commit block: not much to 566 /* How to differentiate between interrupted commit
520 * do other than move on to the next sequence 567 * and journal corruption ?
568 *
569 * {nth transaction}
570 * Checksum Verification Failed
571 * |
572 * ____________________
573 * | |
574 * async_commit sync_commit
575 * | |
576 * | GO TO NEXT "Journal Corruption"
577 * | TRANSACTION
578 * |
579 * {(n+1)th transanction}
580 * |
581 * _______|______________
582 * | |
583 * Commit block found Commit block not found
584 * | |
585 * "Journal Corruption" |
586 * _____________|_________
587 * | |
588 * nth trans corrupt OR nth trans
589 * and (n+1)th interrupted interrupted
590 * before commit block
591 * could reach the disk.
592 * (Cannot find the difference in above
593 * mentioned conditions. Hence assume
594 * "Interrupted Commit".)
595 */
596
597 /* Found an expected commit block: if checksums
598 * are present verify them in PASS_SCAN; else not
599 * much to do other than move on to the next sequence
521 * number. */ 600 * number. */
601 if (pass == PASS_SCAN &&
602 JBD2_HAS_COMPAT_FEATURE(journal,
603 JBD2_FEATURE_COMPAT_CHECKSUM)) {
604 int chksum_err, chksum_seen;
605 struct commit_header *cbh =
606 (struct commit_header *)bh->b_data;
607 unsigned found_chksum =
608 be32_to_cpu(cbh->h_chksum[0]);
609
610 chksum_err = chksum_seen = 0;
611
612 if (info->end_transaction) {
613 printk(KERN_ERR "JBD: Transaction %u "
614 "found to be corrupt.\n",
615 next_commit_ID - 1);
616 brelse(bh);
617 break;
618 }
619
620 if (crc32_sum == found_chksum &&
621 cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
622 cbh->h_chksum_size ==
623 JBD2_CRC32_CHKSUM_SIZE)
624 chksum_seen = 1;
625 else if (!(cbh->h_chksum_type == 0 &&
626 cbh->h_chksum_size == 0 &&
627 found_chksum == 0 &&
628 !chksum_seen))
629 /*
630 * If fs is mounted using an old kernel and then
631 * kernel with journal_chksum is used then we
632 * get a situation where the journal flag has
633 * checksum flag set but checksums are not
634 * present i.e chksum = 0, in the individual
635 * commit blocks.
636 * Hence to avoid checksum failures, in this
637 * situation, this extra check is added.
638 */
639 chksum_err = 1;
640
641 if (chksum_err) {
642 info->end_transaction = next_commit_ID;
643
644 if (!JBD2_HAS_COMPAT_FEATURE(journal,
645 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
646 printk(KERN_ERR
647 "JBD: Transaction %u "
648 "found to be corrupt.\n",
649 next_commit_ID);
650 brelse(bh);
651 break;
652 }
653 }
654 crc32_sum = ~0;
655 }
522 brelse(bh); 656 brelse(bh);
523 next_commit_ID++; 657 next_commit_ID++;
524 continue; 658 continue;
@@ -554,9 +688,10 @@ static int do_one_pass(journal_t *journal,
554 * transaction marks the end of the valid log. 688 * transaction marks the end of the valid log.
555 */ 689 */
556 690
557 if (pass == PASS_SCAN) 691 if (pass == PASS_SCAN) {
558 info->end_transaction = next_commit_ID; 692 if (!info->end_transaction)
559 else { 693 info->end_transaction = next_commit_ID;
694 } else {
560 /* It's really bad news if different passes end up at 695 /* It's really bad news if different passes end up at
561 * different places (but possible due to IO errors). */ 696 * different places (but possible due to IO errors). */
562 if (info->end_transaction != next_commit_ID) { 697 if (info->end_transaction != next_commit_ID) {