aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2012-05-20 19:27:00 -0400
committerNeilBrown <neilb@suse.de>2012-05-20 19:27:00 -0400
commitc6563a8c38fde3c1c7fc925a10bde3ca20799301 (patch)
tree3916ae8247149a9dcf39ee1ca262f97be39071eb /drivers/md
parent2c810cddc44d6f95cef75df3f07fc0850ff92417 (diff)
md: add possibility to change data-offset for devices.
When reshaping we can avoid costly intermediate backup by changing the 'start' address of the array on the device (if there is enough room). So as a first step, allow such a change to be requested through sysfs, and recorded in v1.x metadata. (As we didn't previous check that all 'pad' fields were zero, we need a new FEATURE flag for this. A (belatedly) check that all remaining 'pad' fields are zero to avoid a repeat of this) The new data offset must be requested separately for each device. This allows each to have a different change in the data offset. This is not likely to be used often but as data_offset can be set per-device, new_data_offset should be too. This patch also removes the 'acknowledged' arg to rdev_set_badblocks as it is never used and never will be. At the same time we add a new arg ('in_new') which is currently always zero but will be used more soon. When a reshape finishes we will need to update the data_offset and rdev->sectors. So provide an exported function to do that. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/md.c217
-rw-r--r--drivers/md/md.h7
-rw-r--r--drivers/md/raid1.c4
-rw-r--r--drivers/md/raid10.c8
-rw-r--r--drivers/md/raid5.c10
5 files changed, 214 insertions, 32 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 44bb1d52dd4c..9fa98fc74b05 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1035,12 +1035,17 @@ static unsigned int calc_sb_csum(mdp_super_t * sb)
1035struct super_type { 1035struct super_type {
1036 char *name; 1036 char *name;
1037 struct module *owner; 1037 struct module *owner;
1038 int (*load_super)(struct md_rdev *rdev, struct md_rdev *refdev, 1038 int (*load_super)(struct md_rdev *rdev,
1039 struct md_rdev *refdev,
1039 int minor_version); 1040 int minor_version);
1040 int (*validate_super)(struct mddev *mddev, struct md_rdev *rdev); 1041 int (*validate_super)(struct mddev *mddev,
1041 void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); 1042 struct md_rdev *rdev);
1043 void (*sync_super)(struct mddev *mddev,
1044 struct md_rdev *rdev);
1042 unsigned long long (*rdev_size_change)(struct md_rdev *rdev, 1045 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
1043 sector_t num_sectors); 1046 sector_t num_sectors);
1047 int (*allow_new_offset)(struct md_rdev *rdev,
1048 unsigned long long new_offset);
1044}; 1049};
1045 1050
1046/* 1051/*
@@ -1112,6 +1117,7 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
1112 1117
1113 rdev->preferred_minor = sb->md_minor; 1118 rdev->preferred_minor = sb->md_minor;
1114 rdev->data_offset = 0; 1119 rdev->data_offset = 0;
1120 rdev->new_data_offset = 0;
1115 rdev->sb_size = MD_SB_BYTES; 1121 rdev->sb_size = MD_SB_BYTES;
1116 rdev->badblocks.shift = -1; 1122 rdev->badblocks.shift = -1;
1117 1123
@@ -1438,6 +1444,12 @@ super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1438 return num_sectors; 1444 return num_sectors;
1439} 1445}
1440 1446
1447static int
1448super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1449{
1450 /* non-zero offset changes not possible with v0.90 */
1451 return new_offset == 0;
1452}
1441 1453
1442/* 1454/*
1443 * version 1 superblock 1455 * version 1 superblock
@@ -1473,6 +1485,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
1473 struct mdp_superblock_1 *sb; 1485 struct mdp_superblock_1 *sb;
1474 int ret; 1486 int ret;
1475 sector_t sb_start; 1487 sector_t sb_start;
1488 sector_t sectors;
1476 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; 1489 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1477 int bmask; 1490 int bmask;
1478 1491
@@ -1527,9 +1540,18 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
1527 bdevname(rdev->bdev,b)); 1540 bdevname(rdev->bdev,b));
1528 return -EINVAL; 1541 return -EINVAL;
1529 } 1542 }
1543 if (sb->pad0 ||
1544 sb->pad3[0] ||
1545 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1546 /* Some padding is non-zero, might be a new feature */
1547 return -EINVAL;
1530 1548
1531 rdev->preferred_minor = 0xffff; 1549 rdev->preferred_minor = 0xffff;
1532 rdev->data_offset = le64_to_cpu(sb->data_offset); 1550 rdev->data_offset = le64_to_cpu(sb->data_offset);
1551 rdev->new_data_offset = rdev->data_offset;
1552 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1553 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1554 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1533 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read)); 1555 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1534 1556
1535 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; 1557 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
@@ -1540,6 +1562,9 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
1540 if (minor_version 1562 if (minor_version
1541 && rdev->data_offset < sb_start + (rdev->sb_size/512)) 1563 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1542 return -EINVAL; 1564 return -EINVAL;
1565 if (minor_version
1566 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1567 return -EINVAL;
1543 1568
1544 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH)) 1569 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1545 rdev->desc_nr = -1; 1570 rdev->desc_nr = -1;
@@ -1611,16 +1636,14 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
1611 else 1636 else
1612 ret = 0; 1637 ret = 0;
1613 } 1638 }
1614 if (minor_version) 1639 if (minor_version) {
1615 rdev->sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) - 1640 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1616 le64_to_cpu(sb->data_offset); 1641 sectors -= rdev->data_offset;
1617 else 1642 } else
1618 rdev->sectors = rdev->sb_start; 1643 sectors = rdev->sb_start;
1619 if (rdev->sectors < le64_to_cpu(sb->data_size)) 1644 if (sectors < le64_to_cpu(sb->data_size))
1620 return -EINVAL; 1645 return -EINVAL;
1621 rdev->sectors = le64_to_cpu(sb->data_size); 1646 rdev->sectors = le64_to_cpu(sb->data_size);
1622 if (le64_to_cpu(sb->size) > rdev->sectors)
1623 return -EINVAL;
1624 return ret; 1647 return ret;
1625} 1648}
1626 1649
@@ -1745,7 +1768,6 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1745 sb->feature_map = 0; 1768 sb->feature_map = 0;
1746 sb->pad0 = 0; 1769 sb->pad0 = 0;
1747 sb->recovery_offset = cpu_to_le64(0); 1770 sb->recovery_offset = cpu_to_le64(0);
1748 memset(sb->pad1, 0, sizeof(sb->pad1));
1749 memset(sb->pad3, 0, sizeof(sb->pad3)); 1771 memset(sb->pad3, 0, sizeof(sb->pad3));
1750 1772
1751 sb->utime = cpu_to_le64((__u64)mddev->utime); 1773 sb->utime = cpu_to_le64((__u64)mddev->utime);
@@ -1767,6 +1789,8 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1767 sb->devflags |= WriteMostly1; 1789 sb->devflags |= WriteMostly1;
1768 else 1790 else
1769 sb->devflags &= ~WriteMostly1; 1791 sb->devflags &= ~WriteMostly1;
1792 sb->data_offset = cpu_to_le64(rdev->data_offset);
1793 sb->data_size = cpu_to_le64(rdev->sectors);
1770 1794
1771 if (mddev->bitmap && mddev->bitmap_info.file == NULL) { 1795 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1772 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset); 1796 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
@@ -1795,6 +1819,12 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1795 mddev->reshape_backwards) 1819 mddev->reshape_backwards)
1796 sb->feature_map 1820 sb->feature_map
1797 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS); 1821 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1822 if (rdev->new_data_offset != rdev->data_offset) {
1823 sb->feature_map
1824 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1825 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1826 - rdev->data_offset));
1827 }
1798 } 1828 }
1799 1829
1800 if (rdev->badblocks.count == 0) 1830 if (rdev->badblocks.count == 0)
@@ -1871,6 +1901,8 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1871 sector_t max_sectors; 1901 sector_t max_sectors;
1872 if (num_sectors && num_sectors < rdev->mddev->dev_sectors) 1902 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1873 return 0; /* component must fit device */ 1903 return 0; /* component must fit device */
1904 if (rdev->data_offset != rdev->new_data_offset)
1905 return 0; /* too confusing */
1874 if (rdev->sb_start < rdev->data_offset) { 1906 if (rdev->sb_start < rdev->data_offset) {
1875 /* minor versions 1 and 2; superblock before data */ 1907 /* minor versions 1 and 2; superblock before data */
1876 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9; 1908 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
@@ -1898,6 +1930,40 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1898 rdev->sb_page); 1930 rdev->sb_page);
1899 md_super_wait(rdev->mddev); 1931 md_super_wait(rdev->mddev);
1900 return num_sectors; 1932 return num_sectors;
1933
1934}
1935
1936static int
1937super_1_allow_new_offset(struct md_rdev *rdev,
1938 unsigned long long new_offset)
1939{
1940 /* All necessary checks on new >= old have been done */
1941 struct bitmap *bitmap;
1942 if (new_offset >= rdev->data_offset)
1943 return 1;
1944
1945 /* with 1.0 metadata, there is no metadata to tread on
1946 * so we can always move back */
1947 if (rdev->mddev->minor_version == 0)
1948 return 1;
1949
1950 /* otherwise we must be sure not to step on
1951 * any metadata, so stay:
1952 * 36K beyond start of superblock
1953 * beyond end of badblocks
1954 * beyond write-intent bitmap
1955 */
1956 if (rdev->sb_start + (32+4)*2 > new_offset)
1957 return 0;
1958 bitmap = rdev->mddev->bitmap;
1959 if (bitmap && !rdev->mddev->bitmap_info.file &&
1960 rdev->sb_start + rdev->mddev->bitmap_info.offset +
1961 bitmap->file_pages * (PAGE_SIZE>>9) > new_offset)
1962 return 0;
1963 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
1964 return 0;
1965
1966 return 1;
1901} 1967}
1902 1968
1903static struct super_type super_types[] = { 1969static struct super_type super_types[] = {
@@ -1908,6 +1974,7 @@ static struct super_type super_types[] = {
1908 .validate_super = super_90_validate, 1974 .validate_super = super_90_validate,
1909 .sync_super = super_90_sync, 1975 .sync_super = super_90_sync,
1910 .rdev_size_change = super_90_rdev_size_change, 1976 .rdev_size_change = super_90_rdev_size_change,
1977 .allow_new_offset = super_90_allow_new_offset,
1911 }, 1978 },
1912 [1] = { 1979 [1] = {
1913 .name = "md-1", 1980 .name = "md-1",
@@ -1916,6 +1983,7 @@ static struct super_type super_types[] = {
1916 .validate_super = super_1_validate, 1983 .validate_super = super_1_validate,
1917 .sync_super = super_1_sync, 1984 .sync_super = super_1_sync,
1918 .rdev_size_change = super_1_rdev_size_change, 1985 .rdev_size_change = super_1_rdev_size_change,
1986 .allow_new_offset = super_1_allow_new_offset,
1919 }, 1987 },
1920}; 1988};
1921 1989
@@ -2823,9 +2891,8 @@ offset_show(struct md_rdev *rdev, char *page)
2823static ssize_t 2891static ssize_t
2824offset_store(struct md_rdev *rdev, const char *buf, size_t len) 2892offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2825{ 2893{
2826 char *e; 2894 unsigned long long offset;
2827 unsigned long long offset = simple_strtoull(buf, &e, 10); 2895 if (strict_strtoull(buf, 10, &offset) < 0)
2828 if (e==buf || (*e && *e != '\n'))
2829 return -EINVAL; 2896 return -EINVAL;
2830 if (rdev->mddev->pers && rdev->raid_disk >= 0) 2897 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2831 return -EBUSY; 2898 return -EBUSY;
@@ -2840,6 +2907,63 @@ offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2840static struct rdev_sysfs_entry rdev_offset = 2907static struct rdev_sysfs_entry rdev_offset =
2841__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store); 2908__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2842 2909
2910static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
2911{
2912 return sprintf(page, "%llu\n",
2913 (unsigned long long)rdev->new_data_offset);
2914}
2915
2916static ssize_t new_offset_store(struct md_rdev *rdev,
2917 const char *buf, size_t len)
2918{
2919 unsigned long long new_offset;
2920 struct mddev *mddev = rdev->mddev;
2921
2922 if (strict_strtoull(buf, 10, &new_offset) < 0)
2923 return -EINVAL;
2924
2925 if (mddev->sync_thread)
2926 return -EBUSY;
2927 if (new_offset == rdev->data_offset)
2928 /* reset is always permitted */
2929 ;
2930 else if (new_offset > rdev->data_offset) {
2931 /* must not push array size beyond rdev_sectors */
2932 if (new_offset - rdev->data_offset
2933 + mddev->dev_sectors > rdev->sectors)
2934 return -E2BIG;
2935 }
2936 /* Metadata worries about other space details. */
2937
2938 /* decreasing the offset is inconsistent with a backwards
2939 * reshape.
2940 */
2941 if (new_offset < rdev->data_offset &&
2942 mddev->reshape_backwards)
2943 return -EINVAL;
2944 /* Increasing offset is inconsistent with forwards
2945 * reshape. reshape_direction should be set to
2946 * 'backwards' first.
2947 */
2948 if (new_offset > rdev->data_offset &&
2949 !mddev->reshape_backwards)
2950 return -EINVAL;
2951
2952 if (mddev->pers && mddev->persistent &&
2953 !super_types[mddev->major_version]
2954 .allow_new_offset(rdev, new_offset))
2955 return -E2BIG;
2956 rdev->new_data_offset = new_offset;
2957 if (new_offset > rdev->data_offset)
2958 mddev->reshape_backwards = 1;
2959 else if (new_offset < rdev->data_offset)
2960 mddev->reshape_backwards = 0;
2961
2962 return len;
2963}
2964static struct rdev_sysfs_entry rdev_new_offset =
2965__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
2966
2843static ssize_t 2967static ssize_t
2844rdev_size_show(struct md_rdev *rdev, char *page) 2968rdev_size_show(struct md_rdev *rdev, char *page)
2845{ 2969{
@@ -2884,6 +3008,8 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
2884 3008
2885 if (strict_blocks_to_sectors(buf, &sectors) < 0) 3009 if (strict_blocks_to_sectors(buf, &sectors) < 0)
2886 return -EINVAL; 3010 return -EINVAL;
3011 if (rdev->data_offset != rdev->new_data_offset)
3012 return -EINVAL; /* too confusing */
2887 if (my_mddev->pers && rdev->raid_disk >= 0) { 3013 if (my_mddev->pers && rdev->raid_disk >= 0) {
2888 if (my_mddev->persistent) { 3014 if (my_mddev->persistent) {
2889 sectors = super_types[my_mddev->major_version]. 3015 sectors = super_types[my_mddev->major_version].
@@ -3020,6 +3146,7 @@ static struct attribute *rdev_default_attrs[] = {
3020 &rdev_errors.attr, 3146 &rdev_errors.attr,
3021 &rdev_slot.attr, 3147 &rdev_slot.attr,
3022 &rdev_offset.attr, 3148 &rdev_offset.attr,
3149 &rdev_new_offset.attr,
3023 &rdev_size.attr, 3150 &rdev_size.attr,
3024 &rdev_recovery_start.attr, 3151 &rdev_recovery_start.attr,
3025 &rdev_bad_blocks.attr, 3152 &rdev_bad_blocks.attr,
@@ -3094,6 +3221,7 @@ int md_rdev_init(struct md_rdev *rdev)
3094 rdev->raid_disk = -1; 3221 rdev->raid_disk = -1;
3095 rdev->flags = 0; 3222 rdev->flags = 0;
3096 rdev->data_offset = 0; 3223 rdev->data_offset = 0;
3224 rdev->new_data_offset = 0;
3097 rdev->sb_events = 0; 3225 rdev->sb_events = 0;
3098 rdev->last_read_error.tv_sec = 0; 3226 rdev->last_read_error.tv_sec = 0;
3099 rdev->last_read_error.tv_nsec = 0; 3227 rdev->last_read_error.tv_nsec = 0;
@@ -3598,7 +3726,17 @@ raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3598 if (mddev->pers) 3726 if (mddev->pers)
3599 rv = update_raid_disks(mddev, n); 3727 rv = update_raid_disks(mddev, n);
3600 else if (mddev->reshape_position != MaxSector) { 3728 else if (mddev->reshape_position != MaxSector) {
3729 struct md_rdev *rdev;
3601 int olddisks = mddev->raid_disks - mddev->delta_disks; 3730 int olddisks = mddev->raid_disks - mddev->delta_disks;
3731
3732 rdev_for_each(rdev, mddev) {
3733 if (olddisks < n &&
3734 rdev->data_offset < rdev->new_data_offset)
3735 return -EINVAL;
3736 if (olddisks > n &&
3737 rdev->data_offset > rdev->new_data_offset)
3738 return -EINVAL;
3739 }
3602 mddev->delta_disks = n - olddisks; 3740 mddev->delta_disks = n - olddisks;
3603 mddev->raid_disks = n; 3741 mddev->raid_disks = n;
3604 mddev->reshape_backwards = (mddev->delta_disks < 0); 3742 mddev->reshape_backwards = (mddev->delta_disks < 0);
@@ -4445,6 +4583,7 @@ reshape_position_show(struct mddev *mddev, char *page)
4445static ssize_t 4583static ssize_t
4446reshape_position_store(struct mddev *mddev, const char *buf, size_t len) 4584reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4447{ 4585{
4586 struct md_rdev *rdev;
4448 char *e; 4587 char *e;
4449 unsigned long long new = simple_strtoull(buf, &e, 10); 4588 unsigned long long new = simple_strtoull(buf, &e, 10);
4450 if (mddev->pers) 4589 if (mddev->pers)
@@ -4457,6 +4596,8 @@ reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4457 mddev->new_level = mddev->level; 4596 mddev->new_level = mddev->level;
4458 mddev->new_layout = mddev->layout; 4597 mddev->new_layout = mddev->layout;
4459 mddev->new_chunk_sectors = mddev->chunk_sectors; 4598 mddev->new_chunk_sectors = mddev->chunk_sectors;
4599 rdev_for_each(rdev, mddev)
4600 rdev->new_data_offset = rdev->data_offset;
4460 return len; 4601 return len;
4461} 4602}
4462 4603
@@ -6001,6 +6142,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
6001static int update_raid_disks(struct mddev *mddev, int raid_disks) 6142static int update_raid_disks(struct mddev *mddev, int raid_disks)
6002{ 6143{
6003 int rv; 6144 int rv;
6145 struct md_rdev *rdev;
6004 /* change the number of raid disks */ 6146 /* change the number of raid disks */
6005 if (mddev->pers->check_reshape == NULL) 6147 if (mddev->pers->check_reshape == NULL)
6006 return -EINVAL; 6148 return -EINVAL;
@@ -6009,6 +6151,16 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
6009 return -EINVAL; 6151 return -EINVAL;
6010 if (mddev->sync_thread || mddev->reshape_position != MaxSector) 6152 if (mddev->sync_thread || mddev->reshape_position != MaxSector)
6011 return -EBUSY; 6153 return -EBUSY;
6154
6155 rdev_for_each(rdev, mddev) {
6156 if (mddev->raid_disks < raid_disks &&
6157 rdev->data_offset < rdev->new_data_offset)
6158 return -EINVAL;
6159 if (mddev->raid_disks > raid_disks &&
6160 rdev->data_offset > rdev->new_data_offset)
6161 return -EINVAL;
6162 }
6163
6012 mddev->delta_disks = raid_disks - mddev->raid_disks; 6164 mddev->delta_disks = raid_disks - mddev->raid_disks;
6013 if (mddev->delta_disks < 0) 6165 if (mddev->delta_disks < 0)
6014 mddev->reshape_backwards = 1; 6166 mddev->reshape_backwards = 1;
@@ -7709,6 +7861,20 @@ void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
7709} 7861}
7710EXPORT_SYMBOL(md_wait_for_blocked_rdev); 7862EXPORT_SYMBOL(md_wait_for_blocked_rdev);
7711 7863
7864void md_finish_reshape(struct mddev *mddev)
7865{
7866 /* called be personality module when reshape completes. */
7867 struct md_rdev *rdev;
7868
7869 rdev_for_each(rdev, mddev) {
7870 if (rdev->data_offset > rdev->new_data_offset)
7871 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
7872 else
7873 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
7874 rdev->data_offset = rdev->new_data_offset;
7875 }
7876}
7877EXPORT_SYMBOL(md_finish_reshape);
7712 7878
7713/* Bad block management. 7879/* Bad block management.
7714 * We can record which blocks on each device are 'bad' and so just 7880 * We can record which blocks on each device are 'bad' and so just
@@ -7957,10 +8123,15 @@ static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
7957} 8123}
7958 8124
7959int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, 8125int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
7960 int acknowledged) 8126 int is_new)
7961{ 8127{
7962 int rv = md_set_badblocks(&rdev->badblocks, 8128 int rv;
7963 s + rdev->data_offset, sectors, acknowledged); 8129 if (is_new)
8130 s += rdev->new_data_offset;
8131 else
8132 s += rdev->data_offset;
8133 rv = md_set_badblocks(&rdev->badblocks,
8134 s, sectors, 0);
7964 if (rv) { 8135 if (rv) {
7965 /* Make sure they get written out promptly */ 8136 /* Make sure they get written out promptly */
7966 sysfs_notify_dirent_safe(rdev->sysfs_state); 8137 sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -8066,11 +8237,15 @@ out:
8066 return rv; 8237 return rv;
8067} 8238}
8068 8239
8069int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors) 8240int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8241 int is_new)
8070{ 8242{
8243 if (is_new)
8244 s += rdev->new_data_offset;
8245 else
8246 s += rdev->data_offset;
8071 return md_clear_badblocks(&rdev->badblocks, 8247 return md_clear_badblocks(&rdev->badblocks,
8072 s + rdev->data_offset, 8248 s, sectors);
8073 sectors);
8074} 8249}
8075EXPORT_SYMBOL_GPL(rdev_clear_badblocks); 8250EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
8076 8251
diff --git a/drivers/md/md.h b/drivers/md/md.h
index d51c0ca37777..98913e8dac1a 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -55,6 +55,7 @@ struct md_rdev {
55 int sb_loaded; 55 int sb_loaded;
56 __u64 sb_events; 56 __u64 sb_events;
57 sector_t data_offset; /* start of data in array */ 57 sector_t data_offset; /* start of data in array */
58 sector_t new_data_offset;/* only relevant while reshaping */
58 sector_t sb_start; /* offset of the super block (in 512byte sectors) */ 59 sector_t sb_start; /* offset of the super block (in 512byte sectors) */
59 int sb_size; /* bytes in the superblock */ 60 int sb_size; /* bytes in the superblock */
60 int preferred_minor; /* autorun support */ 61 int preferred_minor; /* autorun support */
@@ -193,8 +194,9 @@ static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
193 return 0; 194 return 0;
194} 195}
195extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, 196extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
196 int acknowledged); 197 int is_new);
197extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors); 198extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
199 int is_new);
198extern void md_ack_all_badblocks(struct badblocks *bb); 200extern void md_ack_all_badblocks(struct badblocks *bb);
199 201
200struct mddev { 202struct mddev {
@@ -592,6 +594,7 @@ extern void md_write_start(struct mddev *mddev, struct bio *bi);
592extern void md_write_end(struct mddev *mddev); 594extern void md_write_end(struct mddev *mddev);
593extern void md_done_sync(struct mddev *mddev, int blocks, int ok); 595extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
594extern void md_error(struct mddev *mddev, struct md_rdev *rdev); 596extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
597extern void md_finish_reshape(struct mddev *mddev);
595 598
596extern int mddev_congested(struct mddev *mddev, int bits); 599extern int mddev_congested(struct mddev *mddev, int bits);
597extern void md_flush_request(struct mddev *mddev, struct bio *bio); 600extern void md_flush_request(struct mddev *mddev, struct bio *bio);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 15dd59b84e94..71a7dc038a82 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2024,7 +2024,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
2024 continue; 2024 continue;
2025 if (test_bit(BIO_UPTODATE, &bio->bi_flags) && 2025 if (test_bit(BIO_UPTODATE, &bio->bi_flags) &&
2026 test_bit(R1BIO_MadeGood, &r1_bio->state)) { 2026 test_bit(R1BIO_MadeGood, &r1_bio->state)) {
2027 rdev_clear_badblocks(rdev, r1_bio->sector, s); 2027 rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
2028 } 2028 }
2029 if (!test_bit(BIO_UPTODATE, &bio->bi_flags) && 2029 if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
2030 test_bit(R1BIO_WriteError, &r1_bio->state)) { 2030 test_bit(R1BIO_WriteError, &r1_bio->state)) {
@@ -2044,7 +2044,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
2044 struct md_rdev *rdev = conf->mirrors[m].rdev; 2044 struct md_rdev *rdev = conf->mirrors[m].rdev;
2045 rdev_clear_badblocks(rdev, 2045 rdev_clear_badblocks(rdev,
2046 r1_bio->sector, 2046 r1_bio->sector,
2047 r1_bio->sectors); 2047 r1_bio->sectors, 0);
2048 rdev_dec_pending(rdev, conf->mddev); 2048 rdev_dec_pending(rdev, conf->mddev);
2049 } else if (r1_bio->bios[m] != NULL) { 2049 } else if (r1_bio->bios[m] != NULL) {
2050 /* This drive got a write error. We need to 2050 /* This drive got a write error. We need to
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 3f91c2e1dfe7..832fb4d56657 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2480,7 +2480,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
2480 rdev_clear_badblocks( 2480 rdev_clear_badblocks(
2481 rdev, 2481 rdev,
2482 r10_bio->devs[m].addr, 2482 r10_bio->devs[m].addr,
2483 r10_bio->sectors); 2483 r10_bio->sectors, 0);
2484 } else { 2484 } else {
2485 if (!rdev_set_badblocks( 2485 if (!rdev_set_badblocks(
2486 rdev, 2486 rdev,
@@ -2496,7 +2496,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
2496 rdev_clear_badblocks( 2496 rdev_clear_badblocks(
2497 rdev, 2497 rdev,
2498 r10_bio->devs[m].addr, 2498 r10_bio->devs[m].addr,
2499 r10_bio->sectors); 2499 r10_bio->sectors, 0);
2500 } else { 2500 } else {
2501 if (!rdev_set_badblocks( 2501 if (!rdev_set_badblocks(
2502 rdev, 2502 rdev,
@@ -2515,7 +2515,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
2515 rdev_clear_badblocks( 2515 rdev_clear_badblocks(
2516 rdev, 2516 rdev,
2517 r10_bio->devs[m].addr, 2517 r10_bio->devs[m].addr,
2518 r10_bio->sectors); 2518 r10_bio->sectors, 0);
2519 rdev_dec_pending(rdev, conf->mddev); 2519 rdev_dec_pending(rdev, conf->mddev);
2520 } else if (bio != NULL && 2520 } else if (bio != NULL &&
2521 !test_bit(BIO_UPTODATE, &bio->bi_flags)) { 2521 !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
@@ -2532,7 +2532,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
2532 rdev_clear_badblocks( 2532 rdev_clear_badblocks(
2533 rdev, 2533 rdev,
2534 r10_bio->devs[m].addr, 2534 r10_bio->devs[m].addr,
2535 r10_bio->sectors); 2535 r10_bio->sectors, 0);
2536 rdev_dec_pending(rdev, conf->mddev); 2536 rdev_dec_pending(rdev, conf->mddev);
2537 } 2537 }
2538 } 2538 }
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 0abbd3447cfb..3705585d7567 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3561,7 +3561,7 @@ finish:
3561 if (test_and_clear_bit(R5_MadeGood, &dev->flags)) { 3561 if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
3562 rdev = conf->disks[i].rdev; 3562 rdev = conf->disks[i].rdev;
3563 rdev_clear_badblocks(rdev, sh->sector, 3563 rdev_clear_badblocks(rdev, sh->sector,
3564 STRIPE_SECTORS); 3564 STRIPE_SECTORS, 0);
3565 rdev_dec_pending(rdev, conf->mddev); 3565 rdev_dec_pending(rdev, conf->mddev);
3566 } 3566 }
3567 if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) { 3567 if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
@@ -3570,7 +3570,7 @@ finish:
3570 /* rdev have been moved down */ 3570 /* rdev have been moved down */
3571 rdev = conf->disks[i].rdev; 3571 rdev = conf->disks[i].rdev;
3572 rdev_clear_badblocks(rdev, sh->sector, 3572 rdev_clear_badblocks(rdev, sh->sector,
3573 STRIPE_SECTORS); 3573 STRIPE_SECTORS, 0);
3574 rdev_dec_pending(rdev, conf->mddev); 3574 rdev_dec_pending(rdev, conf->mddev);
3575 } 3575 }
3576 } 3576 }
@@ -5505,10 +5505,14 @@ static int raid5_start_reshape(struct mddev *mddev)
5505 if (!check_stripe_cache(mddev)) 5505 if (!check_stripe_cache(mddev))
5506 return -ENOSPC; 5506 return -ENOSPC;
5507 5507
5508 rdev_for_each(rdev, mddev) 5508 rdev_for_each(rdev, mddev) {
5509 /* Don't support changing data_offset yet */
5510 if (rdev->new_data_offset != rdev->data_offset)
5511 return -EINVAL;
5509 if (!test_bit(In_sync, &rdev->flags) 5512 if (!test_bit(In_sync, &rdev->flags)
5510 && !test_bit(Faulty, &rdev->flags)) 5513 && !test_bit(Faulty, &rdev->flags))
5511 spares++; 5514 spares++;
5515 }
5512 5516
5513 if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded) 5517 if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded)
5514 /* Not enough devices even to make a degraded array 5518 /* Not enough devices even to make a degraded array