aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2012-05-20 19:27:00 -0400
committerNeilBrown <neilb@suse.de>2012-05-20 19:27:00 -0400
commit05616be5e11f66888b66554957dbecdd90658a84 (patch)
treee1d5607aa30926f7cd4b82b60c32f277c9aff39e
parentc6563a8c38fde3c1c7fc925a10bde3ca20799301 (diff)
md/raid5: Use correct data_offset for all IO.
As there can now be two different data_offsets - an 'old' and a 'new' - we need to carefully choose between them. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid5.c72
1 files changed, 59 insertions, 13 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 3705585d7567..71d1de909ba5 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -488,6 +488,27 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
488 return sh; 488 return sh;
489} 489}
490 490
491/* Determine if 'data_offset' or 'new_data_offset' should be used
492 * in this stripe_head.
493 */
494static int use_new_offset(struct r5conf *conf, struct stripe_head *sh)
495{
496 sector_t progress = conf->reshape_progress;
497 /* Need a memory barrier to make sure we see the value
498 * of conf->generation, or ->data_offset that was set before
499 * reshape_progress was updated.
500 */
501 smp_rmb();
502 if (progress == MaxSector)
503 return 0;
504 if (sh->generation == conf->generation - 1)
505 return 0;
506 /* We are in a reshape, and this is a new-generation stripe,
507 * so use new_data_offset.
508 */
509 return 1;
510}
511
491static void 512static void
492raid5_end_read_request(struct bio *bi, int error); 513raid5_end_read_request(struct bio *bi, int error);
493static void 514static void
@@ -603,7 +624,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
603 __func__, (unsigned long long)sh->sector, 624 __func__, (unsigned long long)sh->sector,
604 bi->bi_rw, i); 625 bi->bi_rw, i);
605 atomic_inc(&sh->count); 626 atomic_inc(&sh->count);
606 bi->bi_sector = sh->sector + rdev->data_offset; 627 if (use_new_offset(conf, sh))
628 bi->bi_sector = (sh->sector
629 + rdev->new_data_offset);
630 else
631 bi->bi_sector = (sh->sector
632 + rdev->data_offset);
607 bi->bi_flags = 1 << BIO_UPTODATE; 633 bi->bi_flags = 1 << BIO_UPTODATE;
608 bi->bi_idx = 0; 634 bi->bi_idx = 0;
609 bi->bi_io_vec[0].bv_len = STRIPE_SIZE; 635 bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
@@ -627,7 +653,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
627 __func__, (unsigned long long)sh->sector, 653 __func__, (unsigned long long)sh->sector,
628 rbi->bi_rw, i); 654 rbi->bi_rw, i);
629 atomic_inc(&sh->count); 655 atomic_inc(&sh->count);
630 rbi->bi_sector = sh->sector + rrdev->data_offset; 656 if (use_new_offset(conf, sh))
657 rbi->bi_sector = (sh->sector
658 + rrdev->new_data_offset);
659 else
660 rbi->bi_sector = (sh->sector
661 + rrdev->data_offset);
631 rbi->bi_flags = 1 << BIO_UPTODATE; 662 rbi->bi_flags = 1 << BIO_UPTODATE;
632 rbi->bi_idx = 0; 663 rbi->bi_idx = 0;
633 rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; 664 rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
@@ -1648,7 +1679,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
1648 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); 1679 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
1649 char b[BDEVNAME_SIZE]; 1680 char b[BDEVNAME_SIZE];
1650 struct md_rdev *rdev = NULL; 1681 struct md_rdev *rdev = NULL;
1651 1682 sector_t s;
1652 1683
1653 for (i=0 ; i<disks; i++) 1684 for (i=0 ; i<disks; i++)
1654 if (bi == &sh->dev[i].req) 1685 if (bi == &sh->dev[i].req)
@@ -1671,6 +1702,10 @@ static void raid5_end_read_request(struct bio * bi, int error)
1671 if (!rdev) 1702 if (!rdev)
1672 rdev = conf->disks[i].rdev; 1703 rdev = conf->disks[i].rdev;
1673 1704
1705 if (use_new_offset(conf, sh))
1706 s = sh->sector + rdev->new_data_offset;
1707 else
1708 s = sh->sector + rdev->data_offset;
1674 if (uptodate) { 1709 if (uptodate) {
1675 set_bit(R5_UPTODATE, &sh->dev[i].flags); 1710 set_bit(R5_UPTODATE, &sh->dev[i].flags);
1676 if (test_bit(R5_ReadError, &sh->dev[i].flags)) { 1711 if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
@@ -1683,8 +1718,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
1683 "md/raid:%s: read error corrected" 1718 "md/raid:%s: read error corrected"
1684 " (%lu sectors at %llu on %s)\n", 1719 " (%lu sectors at %llu on %s)\n",
1685 mdname(conf->mddev), STRIPE_SECTORS, 1720 mdname(conf->mddev), STRIPE_SECTORS,
1686 (unsigned long long)(sh->sector 1721 (unsigned long long)s,
1687 + rdev->data_offset),
1688 bdevname(rdev->bdev, b)); 1722 bdevname(rdev->bdev, b));
1689 atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); 1723 atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
1690 clear_bit(R5_ReadError, &sh->dev[i].flags); 1724 clear_bit(R5_ReadError, &sh->dev[i].flags);
@@ -1704,8 +1738,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
1704 "md/raid:%s: read error on replacement device " 1738 "md/raid:%s: read error on replacement device "
1705 "(sector %llu on %s).\n", 1739 "(sector %llu on %s).\n",
1706 mdname(conf->mddev), 1740 mdname(conf->mddev),
1707 (unsigned long long)(sh->sector 1741 (unsigned long long)s,
1708 + rdev->data_offset),
1709 bdn); 1742 bdn);
1710 else if (conf->mddev->degraded >= conf->max_degraded) 1743 else if (conf->mddev->degraded >= conf->max_degraded)
1711 printk_ratelimited( 1744 printk_ratelimited(
@@ -1713,8 +1746,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
1713 "md/raid:%s: read error not correctable " 1746 "md/raid:%s: read error not correctable "
1714 "(sector %llu on %s).\n", 1747 "(sector %llu on %s).\n",
1715 mdname(conf->mddev), 1748 mdname(conf->mddev),
1716 (unsigned long long)(sh->sector 1749 (unsigned long long)s,
1717 + rdev->data_offset),
1718 bdn); 1750 bdn);
1719 else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) 1751 else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
1720 /* Oh, no!!! */ 1752 /* Oh, no!!! */
@@ -1723,8 +1755,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
1723 "md/raid:%s: read error NOT corrected!! " 1755 "md/raid:%s: read error NOT corrected!! "
1724 "(sector %llu on %s).\n", 1756 "(sector %llu on %s).\n",
1725 mdname(conf->mddev), 1757 mdname(conf->mddev),
1726 (unsigned long long)(sh->sector 1758 (unsigned long long)s,
1727 + rdev->data_offset),
1728 bdn); 1759 bdn);
1729 else if (atomic_read(&rdev->read_errors) 1760 else if (atomic_read(&rdev->read_errors)
1730 > conf->max_nr_stripes) 1761 > conf->max_nr_stripes)
@@ -3842,6 +3873,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
3842 raid_bio->bi_next = (void*)rdev; 3873 raid_bio->bi_next = (void*)rdev;
3843 align_bi->bi_bdev = rdev->bdev; 3874 align_bi->bi_bdev = rdev->bdev;
3844 align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); 3875 align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
3876 /* No reshape active, so we can trust rdev->data_offset */
3845 align_bi->bi_sector += rdev->data_offset; 3877 align_bi->bi_sector += rdev->data_offset;
3846 3878
3847 if (!bio_fits_rdev(align_bi) || 3879 if (!bio_fits_rdev(align_bi) ||
@@ -5182,9 +5214,12 @@ static int run(struct mddev *mddev)
5182 blk_queue_io_opt(mddev->queue, chunk_size * 5214 blk_queue_io_opt(mddev->queue, chunk_size *
5183 (conf->raid_disks - conf->max_degraded)); 5215 (conf->raid_disks - conf->max_degraded));
5184 5216
5185 rdev_for_each(rdev, mddev) 5217 rdev_for_each(rdev, mddev) {
5186 disk_stack_limits(mddev->gendisk, rdev->bdev, 5218 disk_stack_limits(mddev->gendisk, rdev->bdev,
5187 rdev->data_offset << 9); 5219 rdev->data_offset << 9);
5220 disk_stack_limits(mddev->gendisk, rdev->bdev,
5221 rdev->new_data_offset << 9);
5222 }
5188 } 5223 }
5189 5224
5190 return 0; 5225 return 0;
@@ -5539,12 +5574,16 @@ static int raid5_start_reshape(struct mddev *mddev)
5539 conf->chunk_sectors = mddev->new_chunk_sectors; 5574 conf->chunk_sectors = mddev->new_chunk_sectors;
5540 conf->prev_algo = conf->algorithm; 5575 conf->prev_algo = conf->algorithm;
5541 conf->algorithm = mddev->new_layout; 5576 conf->algorithm = mddev->new_layout;
5577 conf->generation++;
5578 /* Code that selects data_offset needs to see the generation update
5579 * if reshape_progress has been set - so a memory barrier needed.
5580 */
5581 smp_mb();
5542 if (mddev->reshape_backwards) 5582 if (mddev->reshape_backwards)
5543 conf->reshape_progress = raid5_size(mddev, 0, 0); 5583 conf->reshape_progress = raid5_size(mddev, 0, 0);
5544 else 5584 else
5545 conf->reshape_progress = 0; 5585 conf->reshape_progress = 0;
5546 conf->reshape_safe = conf->reshape_progress; 5586 conf->reshape_safe = conf->reshape_progress;
5547 conf->generation++;
5548 spin_unlock_irq(&conf->device_lock); 5587 spin_unlock_irq(&conf->device_lock);
5549 5588
5550 /* Add some new drives, as many as will fit. 5589 /* Add some new drives, as many as will fit.
@@ -5596,6 +5635,9 @@ static int raid5_start_reshape(struct mddev *mddev)
5596 mddev->recovery = 0; 5635 mddev->recovery = 0;
5597 spin_lock_irq(&conf->device_lock); 5636 spin_lock_irq(&conf->device_lock);
5598 mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; 5637 mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
5638 rdev_for_each(rdev, mddev)
5639 rdev->new_data_offset = rdev->data_offset;
5640 smp_wmb();
5599 conf->reshape_progress = MaxSector; 5641 conf->reshape_progress = MaxSector;
5600 mddev->reshape_position = MaxSector; 5642 mddev->reshape_position = MaxSector;
5601 spin_unlock_irq(&conf->device_lock); 5643 spin_unlock_irq(&conf->device_lock);
@@ -5614,9 +5656,13 @@ static void end_reshape(struct r5conf *conf)
5614{ 5656{
5615 5657
5616 if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { 5658 if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
5659 struct md_rdev *rdev;
5617 5660
5618 spin_lock_irq(&conf->device_lock); 5661 spin_lock_irq(&conf->device_lock);
5619 conf->previous_raid_disks = conf->raid_disks; 5662 conf->previous_raid_disks = conf->raid_disks;
5663 rdev_for_each(rdev, conf->mddev)
5664 rdev->data_offset = rdev->new_data_offset;
5665 smp_wmb();
5620 conf->reshape_progress = MaxSector; 5666 conf->reshape_progress = MaxSector;
5621 spin_unlock_irq(&conf->device_lock); 5667 spin_unlock_irq(&conf->device_lock);
5622 wake_up(&conf->wait_for_overlap); 5668 wake_up(&conf->wait_for_overlap);