diff options
author | Girish Shilamkar <girish@clusterfs.com> | 2008-01-28 23:58:27 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2008-01-28 23:58:27 -0500 |
commit | 818d276ceb83aa9fdebb5e0a53188290312de987 (patch) | |
tree | de3fb4ffadd72caea2876c5232ce76cd14b3646e /fs/jbd2/recovery.c | |
parent | 8e85fb3f305b24b79c6d9cb7a56d22b062335ad3 (diff) |
ext4: Add the journal checksum feature
The journal checksum feature adds two new flags i.e
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT and JBD2_FEATURE_COMPAT_CHECKSUM.
JBD2_FEATURE_CHECKSUM flag indicates that the commit block contains the
checksum for the blocks described by the descriptor blocks.
Due to checksums, writing of the commit record no longer needs to be
synchronous. Now commit record can be sent to disk without waiting for
descriptor blocks to be written to disk. This behavior is controlled
using JBD2_FEATURE_ASYNC_COMMIT flag. Older kernels/e2fsck should not be
able to recover the journal with _ASYNC_COMMIT hence it is made
incompat.
The commit header has been extended to hold the checksum along with the
type of the checksum.
For recovery in pass scan checksums are verified to ensure the sanity
and completeness(in case of _ASYNC_COMMIT) of every transaction.
Signed-off-by: Andreas Dilger <adilger@clusterfs.com>
Signed-off-by: Girish Shilamkar <girish@clusterfs.com>
Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Diffstat (limited to 'fs/jbd2/recovery.c')
-rw-r--r-- | fs/jbd2/recovery.c | 151 |
1 files changed, 143 insertions, 8 deletions
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index d0ce627539ef..921680663fa2 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/jbd2.h> | 21 | #include <linux/jbd2.h> |
22 | #include <linux/errno.h> | 22 | #include <linux/errno.h> |
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/crc32.h> | ||
24 | #endif | 25 | #endif |
25 | 26 | ||
26 | /* | 27 | /* |
@@ -316,6 +317,37 @@ static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag | |||
316 | return block; | 317 | return block; |
317 | } | 318 | } |
318 | 319 | ||
320 | /* | ||
321 | * calc_chksums calculates the checksums for the blocks described in the | ||
322 | * descriptor block. | ||
323 | */ | ||
324 | static int calc_chksums(journal_t *journal, struct buffer_head *bh, | ||
325 | unsigned long *next_log_block, __u32 *crc32_sum) | ||
326 | { | ||
327 | int i, num_blks, err; | ||
328 | unsigned long io_block; | ||
329 | struct buffer_head *obh; | ||
330 | |||
331 | num_blks = count_tags(journal, bh); | ||
332 | /* Calculate checksum of the descriptor block. */ | ||
333 | *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); | ||
334 | |||
335 | for (i = 0; i < num_blks; i++) { | ||
336 | io_block = (*next_log_block)++; | ||
337 | wrap(journal, *next_log_block); | ||
338 | err = jread(&obh, journal, io_block); | ||
339 | if (err) { | ||
340 | printk(KERN_ERR "JBD: IO error %d recovering block " | ||
341 | "%lu in log\n", err, io_block); | ||
342 | return 1; | ||
343 | } else { | ||
344 | *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, | ||
345 | obh->b_size); | ||
346 | } | ||
347 | } | ||
348 | return 0; | ||
349 | } | ||
350 | |||
319 | static int do_one_pass(journal_t *journal, | 351 | static int do_one_pass(journal_t *journal, |
320 | struct recovery_info *info, enum passtype pass) | 352 | struct recovery_info *info, enum passtype pass) |
321 | { | 353 | { |
@@ -328,6 +360,7 @@ static int do_one_pass(journal_t *journal, | |||
328 | unsigned int sequence; | 360 | unsigned int sequence; |
329 | int blocktype; | 361 | int blocktype; |
330 | int tag_bytes = journal_tag_bytes(journal); | 362 | int tag_bytes = journal_tag_bytes(journal); |
363 | __u32 crc32_sum = ~0; /* Transactional Checksums */ | ||
331 | 364 | ||
332 | /* Precompute the maximum metadata descriptors in a descriptor block */ | 365 | /* Precompute the maximum metadata descriptors in a descriptor block */ |
333 | int MAX_BLOCKS_PER_DESC; | 366 | int MAX_BLOCKS_PER_DESC; |
@@ -419,12 +452,26 @@ static int do_one_pass(journal_t *journal, | |||
419 | switch(blocktype) { | 452 | switch(blocktype) { |
420 | case JBD2_DESCRIPTOR_BLOCK: | 453 | case JBD2_DESCRIPTOR_BLOCK: |
421 | /* If it is a valid descriptor block, replay it | 454 | /* If it is a valid descriptor block, replay it |
422 | * in pass REPLAY; otherwise, just skip over the | 455 | * in pass REPLAY; if journal_checksums enabled, then |
423 | * blocks it describes. */ | 456 | * calculate checksums in PASS_SCAN, otherwise, |
457 | * just skip over the blocks it describes. */ | ||
424 | if (pass != PASS_REPLAY) { | 458 | if (pass != PASS_REPLAY) { |
459 | if (pass == PASS_SCAN && | ||
460 | JBD2_HAS_COMPAT_FEATURE(journal, | ||
461 | JBD2_FEATURE_COMPAT_CHECKSUM) && | ||
462 | !info->end_transaction) { | ||
463 | if (calc_chksums(journal, bh, | ||
464 | &next_log_block, | ||
465 | &crc32_sum)) { | ||
466 | put_bh(bh); | ||
467 | break; | ||
468 | } | ||
469 | put_bh(bh); | ||
470 | continue; | ||
471 | } | ||
425 | next_log_block += count_tags(journal, bh); | 472 | next_log_block += count_tags(journal, bh); |
426 | wrap(journal, next_log_block); | 473 | wrap(journal, next_log_block); |
427 | brelse(bh); | 474 | put_bh(bh); |
428 | continue; | 475 | continue; |
429 | } | 476 | } |
430 | 477 | ||
@@ -516,9 +563,96 @@ static int do_one_pass(journal_t *journal, | |||
516 | continue; | 563 | continue; |
517 | 564 | ||
518 | case JBD2_COMMIT_BLOCK: | 565 | case JBD2_COMMIT_BLOCK: |
519 | /* Found an expected commit block: not much to | 566 | /* How to differentiate between interrupted commit |
520 | * do other than move on to the next sequence | 567 | * and journal corruption ? |
568 | * | ||
569 | * {nth transaction} | ||
570 | * Checksum Verification Failed | ||
571 | * | | ||
572 | * ____________________ | ||
573 | * | | | ||
574 | * async_commit sync_commit | ||
575 | * | | | ||
576 | * | GO TO NEXT "Journal Corruption" | ||
577 | * | TRANSACTION | ||
578 | * | | ||
579 | * {(n+1)th transanction} | ||
580 | * | | ||
581 | * _______|______________ | ||
582 | * | | | ||
583 | * Commit block found Commit block not found | ||
584 | * | | | ||
585 | * "Journal Corruption" | | ||
586 | * _____________|_________ | ||
587 | * | | | ||
588 | * nth trans corrupt OR nth trans | ||
589 | * and (n+1)th interrupted interrupted | ||
590 | * before commit block | ||
591 | * could reach the disk. | ||
592 | * (Cannot find the difference in above | ||
593 | * mentioned conditions. Hence assume | ||
594 | * "Interrupted Commit".) | ||
595 | */ | ||
596 | |||
597 | /* Found an expected commit block: if checksums | ||
598 | * are present verify them in PASS_SCAN; else not | ||
599 | * much to do other than move on to the next sequence | ||
521 | * number. */ | 600 | * number. */ |
601 | if (pass == PASS_SCAN && | ||
602 | JBD2_HAS_COMPAT_FEATURE(journal, | ||
603 | JBD2_FEATURE_COMPAT_CHECKSUM)) { | ||
604 | int chksum_err, chksum_seen; | ||
605 | struct commit_header *cbh = | ||
606 | (struct commit_header *)bh->b_data; | ||
607 | unsigned found_chksum = | ||
608 | be32_to_cpu(cbh->h_chksum[0]); | ||
609 | |||
610 | chksum_err = chksum_seen = 0; | ||
611 | |||
612 | if (info->end_transaction) { | ||
613 | printk(KERN_ERR "JBD: Transaction %u " | ||
614 | "found to be corrupt.\n", | ||
615 | next_commit_ID - 1); | ||
616 | brelse(bh); | ||
617 | break; | ||
618 | } | ||
619 | |||
620 | if (crc32_sum == found_chksum && | ||
621 | cbh->h_chksum_type == JBD2_CRC32_CHKSUM && | ||
622 | cbh->h_chksum_size == | ||
623 | JBD2_CRC32_CHKSUM_SIZE) | ||
624 | chksum_seen = 1; | ||
625 | else if (!(cbh->h_chksum_type == 0 && | ||
626 | cbh->h_chksum_size == 0 && | ||
627 | found_chksum == 0 && | ||
628 | !chksum_seen)) | ||
629 | /* | ||
630 | * If fs is mounted using an old kernel and then | ||
631 | * kernel with journal_chksum is used then we | ||
632 | * get a situation where the journal flag has | ||
633 | * checksum flag set but checksums are not | ||
634 | * present i.e chksum = 0, in the individual | ||
635 | * commit blocks. | ||
636 | * Hence to avoid checksum failures, in this | ||
637 | * situation, this extra check is added. | ||
638 | */ | ||
639 | chksum_err = 1; | ||
640 | |||
641 | if (chksum_err) { | ||
642 | info->end_transaction = next_commit_ID; | ||
643 | |||
644 | if (!JBD2_HAS_COMPAT_FEATURE(journal, | ||
645 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){ | ||
646 | printk(KERN_ERR | ||
647 | "JBD: Transaction %u " | ||
648 | "found to be corrupt.\n", | ||
649 | next_commit_ID); | ||
650 | brelse(bh); | ||
651 | break; | ||
652 | } | ||
653 | } | ||
654 | crc32_sum = ~0; | ||
655 | } | ||
522 | brelse(bh); | 656 | brelse(bh); |
523 | next_commit_ID++; | 657 | next_commit_ID++; |
524 | continue; | 658 | continue; |
@@ -554,9 +688,10 @@ static int do_one_pass(journal_t *journal, | |||
554 | * transaction marks the end of the valid log. | 688 | * transaction marks the end of the valid log. |
555 | */ | 689 | */ |
556 | 690 | ||
557 | if (pass == PASS_SCAN) | 691 | if (pass == PASS_SCAN) { |
558 | info->end_transaction = next_commit_ID; | 692 | if (!info->end_transaction) |
559 | else { | 693 | info->end_transaction = next_commit_ID; |
694 | } else { | ||
560 | /* It's really bad news if different passes end up at | 695 | /* It's really bad news if different passes end up at |
561 | * different places (but possible due to IO errors). */ | 696 | * different places (but possible due to IO errors). */ |
562 | if (info->end_transaction != next_commit_ID) { | 697 | if (info->end_transaction != next_commit_ID) { |