diff options
author | NeilBrown <neilb@suse.de> | 2012-05-20 19:27:00 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2012-05-20 19:27:00 -0400 |
commit | 05616be5e11f66888b66554957dbecdd90658a84 (patch) | |
tree | e1d5607aa30926f7cd4b82b60c32f277c9aff39e /drivers/md/raid5.c | |
parent | c6563a8c38fde3c1c7fc925a10bde3ca20799301 (diff) |
md/raid5: Use correct data_offset for all IO.
As there can now be two different data_offsets - an 'old' and
a 'new' - we need to carefully choose between them.
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 72 |
1 files changed, 59 insertions, 13 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3705585d7567..71d1de909ba5 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -488,6 +488,27 @@ get_active_stripe(struct r5conf *conf, sector_t sector, | |||
488 | return sh; | 488 | return sh; |
489 | } | 489 | } |
490 | 490 | ||
491 | /* Determine if 'data_offset' or 'new_data_offset' should be used | ||
492 | * in this stripe_head. | ||
493 | */ | ||
494 | static int use_new_offset(struct r5conf *conf, struct stripe_head *sh) | ||
495 | { | ||
496 | sector_t progress = conf->reshape_progress; | ||
497 | /* Need a memory barrier to make sure we see the value | ||
498 | * of conf->generation, or ->data_offset that was set before | ||
499 | * reshape_progress was updated. | ||
500 | */ | ||
501 | smp_rmb(); | ||
502 | if (progress == MaxSector) | ||
503 | return 0; | ||
504 | if (sh->generation == conf->generation - 1) | ||
505 | return 0; | ||
506 | /* We are in a reshape, and this is a new-generation stripe, | ||
507 | * so use new_data_offset. | ||
508 | */ | ||
509 | return 1; | ||
510 | } | ||
511 | |||
491 | static void | 512 | static void |
492 | raid5_end_read_request(struct bio *bi, int error); | 513 | raid5_end_read_request(struct bio *bi, int error); |
493 | static void | 514 | static void |
@@ -603,7 +624,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
603 | __func__, (unsigned long long)sh->sector, | 624 | __func__, (unsigned long long)sh->sector, |
604 | bi->bi_rw, i); | 625 | bi->bi_rw, i); |
605 | atomic_inc(&sh->count); | 626 | atomic_inc(&sh->count); |
606 | bi->bi_sector = sh->sector + rdev->data_offset; | 627 | if (use_new_offset(conf, sh)) |
628 | bi->bi_sector = (sh->sector | ||
629 | + rdev->new_data_offset); | ||
630 | else | ||
631 | bi->bi_sector = (sh->sector | ||
632 | + rdev->data_offset); | ||
607 | bi->bi_flags = 1 << BIO_UPTODATE; | 633 | bi->bi_flags = 1 << BIO_UPTODATE; |
608 | bi->bi_idx = 0; | 634 | bi->bi_idx = 0; |
609 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; | 635 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; |
@@ -627,7 +653,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
627 | __func__, (unsigned long long)sh->sector, | 653 | __func__, (unsigned long long)sh->sector, |
628 | rbi->bi_rw, i); | 654 | rbi->bi_rw, i); |
629 | atomic_inc(&sh->count); | 655 | atomic_inc(&sh->count); |
630 | rbi->bi_sector = sh->sector + rrdev->data_offset; | 656 | if (use_new_offset(conf, sh)) |
657 | rbi->bi_sector = (sh->sector | ||
658 | + rrdev->new_data_offset); | ||
659 | else | ||
660 | rbi->bi_sector = (sh->sector | ||
661 | + rrdev->data_offset); | ||
631 | rbi->bi_flags = 1 << BIO_UPTODATE; | 662 | rbi->bi_flags = 1 << BIO_UPTODATE; |
632 | rbi->bi_idx = 0; | 663 | rbi->bi_idx = 0; |
633 | rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; | 664 | rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; |
@@ -1648,7 +1679,7 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1648 | int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); | 1679 | int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); |
1649 | char b[BDEVNAME_SIZE]; | 1680 | char b[BDEVNAME_SIZE]; |
1650 | struct md_rdev *rdev = NULL; | 1681 | struct md_rdev *rdev = NULL; |
1651 | 1682 | sector_t s; | |
1652 | 1683 | ||
1653 | for (i=0 ; i<disks; i++) | 1684 | for (i=0 ; i<disks; i++) |
1654 | if (bi == &sh->dev[i].req) | 1685 | if (bi == &sh->dev[i].req) |
@@ -1671,6 +1702,10 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1671 | if (!rdev) | 1702 | if (!rdev) |
1672 | rdev = conf->disks[i].rdev; | 1703 | rdev = conf->disks[i].rdev; |
1673 | 1704 | ||
1705 | if (use_new_offset(conf, sh)) | ||
1706 | s = sh->sector + rdev->new_data_offset; | ||
1707 | else | ||
1708 | s = sh->sector + rdev->data_offset; | ||
1674 | if (uptodate) { | 1709 | if (uptodate) { |
1675 | set_bit(R5_UPTODATE, &sh->dev[i].flags); | 1710 | set_bit(R5_UPTODATE, &sh->dev[i].flags); |
1676 | if (test_bit(R5_ReadError, &sh->dev[i].flags)) { | 1711 | if (test_bit(R5_ReadError, &sh->dev[i].flags)) { |
@@ -1683,8 +1718,7 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1683 | "md/raid:%s: read error corrected" | 1718 | "md/raid:%s: read error corrected" |
1684 | " (%lu sectors at %llu on %s)\n", | 1719 | " (%lu sectors at %llu on %s)\n", |
1685 | mdname(conf->mddev), STRIPE_SECTORS, | 1720 | mdname(conf->mddev), STRIPE_SECTORS, |
1686 | (unsigned long long)(sh->sector | 1721 | (unsigned long long)s, |
1687 | + rdev->data_offset), | ||
1688 | bdevname(rdev->bdev, b)); | 1722 | bdevname(rdev->bdev, b)); |
1689 | atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); | 1723 | atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); |
1690 | clear_bit(R5_ReadError, &sh->dev[i].flags); | 1724 | clear_bit(R5_ReadError, &sh->dev[i].flags); |
@@ -1704,8 +1738,7 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1704 | "md/raid:%s: read error on replacement device " | 1738 | "md/raid:%s: read error on replacement device " |
1705 | "(sector %llu on %s).\n", | 1739 | "(sector %llu on %s).\n", |
1706 | mdname(conf->mddev), | 1740 | mdname(conf->mddev), |
1707 | (unsigned long long)(sh->sector | 1741 | (unsigned long long)s, |
1708 | + rdev->data_offset), | ||
1709 | bdn); | 1742 | bdn); |
1710 | else if (conf->mddev->degraded >= conf->max_degraded) | 1743 | else if (conf->mddev->degraded >= conf->max_degraded) |
1711 | printk_ratelimited( | 1744 | printk_ratelimited( |
@@ -1713,8 +1746,7 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1713 | "md/raid:%s: read error not correctable " | 1746 | "md/raid:%s: read error not correctable " |
1714 | "(sector %llu on %s).\n", | 1747 | "(sector %llu on %s).\n", |
1715 | mdname(conf->mddev), | 1748 | mdname(conf->mddev), |
1716 | (unsigned long long)(sh->sector | 1749 | (unsigned long long)s, |
1717 | + rdev->data_offset), | ||
1718 | bdn); | 1750 | bdn); |
1719 | else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) | 1751 | else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) |
1720 | /* Oh, no!!! */ | 1752 | /* Oh, no!!! */ |
@@ -1723,8 +1755,7 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1723 | "md/raid:%s: read error NOT corrected!! " | 1755 | "md/raid:%s: read error NOT corrected!! " |
1724 | "(sector %llu on %s).\n", | 1756 | "(sector %llu on %s).\n", |
1725 | mdname(conf->mddev), | 1757 | mdname(conf->mddev), |
1726 | (unsigned long long)(sh->sector | 1758 | (unsigned long long)s, |
1727 | + rdev->data_offset), | ||
1728 | bdn); | 1759 | bdn); |
1729 | else if (atomic_read(&rdev->read_errors) | 1760 | else if (atomic_read(&rdev->read_errors) |
1730 | > conf->max_nr_stripes) | 1761 | > conf->max_nr_stripes) |
@@ -3842,6 +3873,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) | |||
3842 | raid_bio->bi_next = (void*)rdev; | 3873 | raid_bio->bi_next = (void*)rdev; |
3843 | align_bi->bi_bdev = rdev->bdev; | 3874 | align_bi->bi_bdev = rdev->bdev; |
3844 | align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); | 3875 | align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); |
3876 | /* No reshape active, so we can trust rdev->data_offset */ | ||
3845 | align_bi->bi_sector += rdev->data_offset; | 3877 | align_bi->bi_sector += rdev->data_offset; |
3846 | 3878 | ||
3847 | if (!bio_fits_rdev(align_bi) || | 3879 | if (!bio_fits_rdev(align_bi) || |
@@ -5182,9 +5214,12 @@ static int run(struct mddev *mddev) | |||
5182 | blk_queue_io_opt(mddev->queue, chunk_size * | 5214 | blk_queue_io_opt(mddev->queue, chunk_size * |
5183 | (conf->raid_disks - conf->max_degraded)); | 5215 | (conf->raid_disks - conf->max_degraded)); |
5184 | 5216 | ||
5185 | rdev_for_each(rdev, mddev) | 5217 | rdev_for_each(rdev, mddev) { |
5186 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 5218 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
5187 | rdev->data_offset << 9); | 5219 | rdev->data_offset << 9); |
5220 | disk_stack_limits(mddev->gendisk, rdev->bdev, | ||
5221 | rdev->new_data_offset << 9); | ||
5222 | } | ||
5188 | } | 5223 | } |
5189 | 5224 | ||
5190 | return 0; | 5225 | return 0; |
@@ -5539,12 +5574,16 @@ static int raid5_start_reshape(struct mddev *mddev) | |||
5539 | conf->chunk_sectors = mddev->new_chunk_sectors; | 5574 | conf->chunk_sectors = mddev->new_chunk_sectors; |
5540 | conf->prev_algo = conf->algorithm; | 5575 | conf->prev_algo = conf->algorithm; |
5541 | conf->algorithm = mddev->new_layout; | 5576 | conf->algorithm = mddev->new_layout; |
5577 | conf->generation++; | ||
5578 | /* Code that selects data_offset needs to see the generation update | ||
5579 | * if reshape_progress has been set - so a memory barrier needed. | ||
5580 | */ | ||
5581 | smp_mb(); | ||
5542 | if (mddev->reshape_backwards) | 5582 | if (mddev->reshape_backwards) |
5543 | conf->reshape_progress = raid5_size(mddev, 0, 0); | 5583 | conf->reshape_progress = raid5_size(mddev, 0, 0); |
5544 | else | 5584 | else |
5545 | conf->reshape_progress = 0; | 5585 | conf->reshape_progress = 0; |
5546 | conf->reshape_safe = conf->reshape_progress; | 5586 | conf->reshape_safe = conf->reshape_progress; |
5547 | conf->generation++; | ||
5548 | spin_unlock_irq(&conf->device_lock); | 5587 | spin_unlock_irq(&conf->device_lock); |
5549 | 5588 | ||
5550 | /* Add some new drives, as many as will fit. | 5589 | /* Add some new drives, as many as will fit. |
@@ -5596,6 +5635,9 @@ static int raid5_start_reshape(struct mddev *mddev) | |||
5596 | mddev->recovery = 0; | 5635 | mddev->recovery = 0; |
5597 | spin_lock_irq(&conf->device_lock); | 5636 | spin_lock_irq(&conf->device_lock); |
5598 | mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; | 5637 | mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; |
5638 | rdev_for_each(rdev, mddev) | ||
5639 | rdev->new_data_offset = rdev->data_offset; | ||
5640 | smp_wmb(); | ||
5599 | conf->reshape_progress = MaxSector; | 5641 | conf->reshape_progress = MaxSector; |
5600 | mddev->reshape_position = MaxSector; | 5642 | mddev->reshape_position = MaxSector; |
5601 | spin_unlock_irq(&conf->device_lock); | 5643 | spin_unlock_irq(&conf->device_lock); |
@@ -5614,9 +5656,13 @@ static void end_reshape(struct r5conf *conf) | |||
5614 | { | 5656 | { |
5615 | 5657 | ||
5616 | if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { | 5658 | if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { |
5659 | struct md_rdev *rdev; | ||
5617 | 5660 | ||
5618 | spin_lock_irq(&conf->device_lock); | 5661 | spin_lock_irq(&conf->device_lock); |
5619 | conf->previous_raid_disks = conf->raid_disks; | 5662 | conf->previous_raid_disks = conf->raid_disks; |
5663 | rdev_for_each(rdev, conf->mddev) | ||
5664 | rdev->data_offset = rdev->new_data_offset; | ||
5665 | smp_wmb(); | ||
5620 | conf->reshape_progress = MaxSector; | 5666 | conf->reshape_progress = MaxSector; |
5621 | spin_unlock_irq(&conf->device_lock); | 5667 | spin_unlock_irq(&conf->device_lock); |
5622 | wake_up(&conf->wait_for_overlap); | 5668 | wake_up(&conf->wait_for_overlap); |