diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-05-12 13:39:03 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:03 -0400 |
commit | 1259ab75c62462b8ffad90067b5e1f6312786a18 (patch) | |
tree | ccc6b795e524a74b3fb9666409ca2f7f33bf98dd /fs/btrfs/inode.c | |
parent | ca7a79ad8dbe24669a8ced01f9fc0126b872745d (diff) |
Btrfs: Handle write errors on raid1 and raid10
When duplicate copies exist, writes are allowed to fail to one of those
copies. This changeset includes a few changes that allow the FS to
continue even when some IOs fail.
It also adds verification of the parent generation number for btree blocks.
This generation is stored in the pointer to a block, and it ensures
that missed writes to are detected.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 66 |
1 files changed, 41 insertions, 25 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a492fd238c88..08760ff9bab7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -430,9 +430,9 @@ struct io_failure_record { | |||
430 | int last_mirror; | 430 | int last_mirror; |
431 | }; | 431 | }; |
432 | 432 | ||
433 | int btrfs_readpage_io_failed_hook(struct bio *failed_bio, | 433 | int btrfs_io_failed_hook(struct bio *failed_bio, |
434 | struct page *page, u64 start, u64 end, | 434 | struct page *page, u64 start, u64 end, |
435 | struct extent_state *state) | 435 | struct extent_state *state) |
436 | { | 436 | { |
437 | struct io_failure_record *failrec = NULL; | 437 | struct io_failure_record *failrec = NULL; |
438 | u64 private; | 438 | u64 private; |
@@ -443,6 +443,7 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio, | |||
443 | struct bio *bio; | 443 | struct bio *bio; |
444 | int num_copies; | 444 | int num_copies; |
445 | int ret; | 445 | int ret; |
446 | int rw; | ||
446 | u64 logical; | 447 | u64 logical; |
447 | 448 | ||
448 | ret = get_state_private(failure_tree, start, &private); | 449 | ret = get_state_private(failure_tree, start, &private); |
@@ -505,7 +506,41 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio, | |||
505 | bio->bi_bdev = failed_bio->bi_bdev; | 506 | bio->bi_bdev = failed_bio->bi_bdev; |
506 | bio->bi_size = 0; | 507 | bio->bi_size = 0; |
507 | bio_add_page(bio, page, failrec->len, start - page_offset(page)); | 508 | bio_add_page(bio, page, failrec->len, start - page_offset(page)); |
508 | btrfs_submit_bio_hook(inode, READ, bio, failrec->last_mirror); | 509 | if (failed_bio->bi_rw & (1 << BIO_RW)) |
510 | rw = WRITE; | ||
511 | else | ||
512 | rw = READ; | ||
513 | |||
514 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, | ||
515 | failrec->last_mirror); | ||
516 | return 0; | ||
517 | } | ||
518 | |||
519 | int btrfs_clean_io_failures(struct inode *inode, u64 start) | ||
520 | { | ||
521 | u64 private; | ||
522 | u64 private_failure; | ||
523 | struct io_failure_record *failure; | ||
524 | int ret; | ||
525 | |||
526 | private = 0; | ||
527 | if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, | ||
528 | (u64)-1, 1, EXTENT_DIRTY)) { | ||
529 | ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, | ||
530 | start, &private_failure); | ||
531 | if (ret == 0) { | ||
532 | failure = (struct io_failure_record *)(unsigned long) | ||
533 | private_failure; | ||
534 | set_state_private(&BTRFS_I(inode)->io_failure_tree, | ||
535 | failure->start, 0); | ||
536 | clear_extent_bits(&BTRFS_I(inode)->io_failure_tree, | ||
537 | failure->start, | ||
538 | failure->start + failure->len - 1, | ||
539 | EXTENT_DIRTY | EXTENT_LOCKED, | ||
540 | GFP_NOFS); | ||
541 | kfree(failure); | ||
542 | } | ||
543 | } | ||
509 | return 0; | 544 | return 0; |
510 | } | 545 | } |
511 | 546 | ||
@@ -547,26 +582,7 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
547 | /* if the io failure tree for this inode is non-empty, | 582 | /* if the io failure tree for this inode is non-empty, |
548 | * check to see if we've recovered from a failed IO | 583 | * check to see if we've recovered from a failed IO |
549 | */ | 584 | */ |
550 | private = 0; | 585 | btrfs_clean_io_failures(inode, start); |
551 | if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, | ||
552 | (u64)-1, 1, EXTENT_DIRTY)) { | ||
553 | u64 private_failure; | ||
554 | struct io_failure_record *failure; | ||
555 | ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, | ||
556 | start, &private_failure); | ||
557 | if (ret == 0) { | ||
558 | failure = (struct io_failure_record *)(unsigned long) | ||
559 | private_failure; | ||
560 | set_state_private(&BTRFS_I(inode)->io_failure_tree, | ||
561 | failure->start, 0); | ||
562 | clear_extent_bits(&BTRFS_I(inode)->io_failure_tree, | ||
563 | failure->start, | ||
564 | failure->start + failure->len - 1, | ||
565 | EXTENT_DIRTY | EXTENT_LOCKED, | ||
566 | GFP_NOFS); | ||
567 | kfree(failure); | ||
568 | } | ||
569 | } | ||
570 | return 0; | 586 | return 0; |
571 | 587 | ||
572 | zeroit: | 588 | zeroit: |
@@ -3657,7 +3673,7 @@ static struct extent_io_ops btrfs_extent_io_ops = { | |||
3657 | .merge_bio_hook = btrfs_merge_bio_hook, | 3673 | .merge_bio_hook = btrfs_merge_bio_hook, |
3658 | .readpage_io_hook = btrfs_readpage_io_hook, | 3674 | .readpage_io_hook = btrfs_readpage_io_hook, |
3659 | .readpage_end_io_hook = btrfs_readpage_end_io_hook, | 3675 | .readpage_end_io_hook = btrfs_readpage_end_io_hook, |
3660 | .readpage_io_failed_hook = btrfs_readpage_io_failed_hook, | 3676 | .readpage_io_failed_hook = btrfs_io_failed_hook, |
3661 | .set_bit_hook = btrfs_set_bit_hook, | 3677 | .set_bit_hook = btrfs_set_bit_hook, |
3662 | .clear_bit_hook = btrfs_clear_bit_hook, | 3678 | .clear_bit_hook = btrfs_clear_bit_hook, |
3663 | }; | 3679 | }; |