diff options
author | NeilBrown <neilb@suse.de> | 2012-05-20 19:27:00 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2012-05-20 19:27:00 -0400 |
commit | c6563a8c38fde3c1c7fc925a10bde3ca20799301 (patch) | |
tree | 3916ae8247149a9dcf39ee1ca262f97be39071eb /drivers/md | |
parent | 2c810cddc44d6f95cef75df3f07fc0850ff92417 (diff) |
md: add possibility to change data-offset for devices.
When reshaping we can avoid costly intermediate backup by
changing the 'start' address of the array on the device
(if there is enough room).
So as a first step, allow such a change to be requested
through sysfs, and recorded in v1.x metadata.
(As we didn't previous check that all 'pad' fields were zero,
we need a new FEATURE flag for this.
A (belatedly) check that all remaining 'pad' fields are
zero to avoid a repeat of this)
The new data offset must be requested separately for each device.
This allows each to have a different change in the data offset.
This is not likely to be used often but as data_offset can be
set per-device, new_data_offset should be too.
This patch also removes the 'acknowledged' arg to rdev_set_badblocks as
it is never used and never will be. At the same time we add a new
arg ('in_new') which is currently always zero but will be used more
soon.
When a reshape finishes we will need to update the data_offset
and rdev->sectors. So provide an exported function to do that.
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/md.c | 217 | ||||
-rw-r--r-- | drivers/md/md.h | 7 | ||||
-rw-r--r-- | drivers/md/raid1.c | 4 | ||||
-rw-r--r-- | drivers/md/raid10.c | 8 | ||||
-rw-r--r-- | drivers/md/raid5.c | 10 |
5 files changed, 214 insertions, 32 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 44bb1d52dd4c..9fa98fc74b05 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -1035,12 +1035,17 @@ static unsigned int calc_sb_csum(mdp_super_t * sb) | |||
1035 | struct super_type { | 1035 | struct super_type { |
1036 | char *name; | 1036 | char *name; |
1037 | struct module *owner; | 1037 | struct module *owner; |
1038 | int (*load_super)(struct md_rdev *rdev, struct md_rdev *refdev, | 1038 | int (*load_super)(struct md_rdev *rdev, |
1039 | struct md_rdev *refdev, | ||
1039 | int minor_version); | 1040 | int minor_version); |
1040 | int (*validate_super)(struct mddev *mddev, struct md_rdev *rdev); | 1041 | int (*validate_super)(struct mddev *mddev, |
1041 | void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); | 1042 | struct md_rdev *rdev); |
1043 | void (*sync_super)(struct mddev *mddev, | ||
1044 | struct md_rdev *rdev); | ||
1042 | unsigned long long (*rdev_size_change)(struct md_rdev *rdev, | 1045 | unsigned long long (*rdev_size_change)(struct md_rdev *rdev, |
1043 | sector_t num_sectors); | 1046 | sector_t num_sectors); |
1047 | int (*allow_new_offset)(struct md_rdev *rdev, | ||
1048 | unsigned long long new_offset); | ||
1044 | }; | 1049 | }; |
1045 | 1050 | ||
1046 | /* | 1051 | /* |
@@ -1112,6 +1117,7 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor | |||
1112 | 1117 | ||
1113 | rdev->preferred_minor = sb->md_minor; | 1118 | rdev->preferred_minor = sb->md_minor; |
1114 | rdev->data_offset = 0; | 1119 | rdev->data_offset = 0; |
1120 | rdev->new_data_offset = 0; | ||
1115 | rdev->sb_size = MD_SB_BYTES; | 1121 | rdev->sb_size = MD_SB_BYTES; |
1116 | rdev->badblocks.shift = -1; | 1122 | rdev->badblocks.shift = -1; |
1117 | 1123 | ||
@@ -1438,6 +1444,12 @@ super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) | |||
1438 | return num_sectors; | 1444 | return num_sectors; |
1439 | } | 1445 | } |
1440 | 1446 | ||
1447 | static int | ||
1448 | super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset) | ||
1449 | { | ||
1450 | /* non-zero offset changes not possible with v0.90 */ | ||
1451 | return new_offset == 0; | ||
1452 | } | ||
1441 | 1453 | ||
1442 | /* | 1454 | /* |
1443 | * version 1 superblock | 1455 | * version 1 superblock |
@@ -1473,6 +1485,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ | |||
1473 | struct mdp_superblock_1 *sb; | 1485 | struct mdp_superblock_1 *sb; |
1474 | int ret; | 1486 | int ret; |
1475 | sector_t sb_start; | 1487 | sector_t sb_start; |
1488 | sector_t sectors; | ||
1476 | char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; | 1489 | char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; |
1477 | int bmask; | 1490 | int bmask; |
1478 | 1491 | ||
@@ -1527,9 +1540,18 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ | |||
1527 | bdevname(rdev->bdev,b)); | 1540 | bdevname(rdev->bdev,b)); |
1528 | return -EINVAL; | 1541 | return -EINVAL; |
1529 | } | 1542 | } |
1543 | if (sb->pad0 || | ||
1544 | sb->pad3[0] || | ||
1545 | memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1]))) | ||
1546 | /* Some padding is non-zero, might be a new feature */ | ||
1547 | return -EINVAL; | ||
1530 | 1548 | ||
1531 | rdev->preferred_minor = 0xffff; | 1549 | rdev->preferred_minor = 0xffff; |
1532 | rdev->data_offset = le64_to_cpu(sb->data_offset); | 1550 | rdev->data_offset = le64_to_cpu(sb->data_offset); |
1551 | rdev->new_data_offset = rdev->data_offset; | ||
1552 | if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) && | ||
1553 | (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET)) | ||
1554 | rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset); | ||
1533 | atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read)); | 1555 | atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read)); |
1534 | 1556 | ||
1535 | rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; | 1557 | rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; |
@@ -1540,6 +1562,9 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ | |||
1540 | if (minor_version | 1562 | if (minor_version |
1541 | && rdev->data_offset < sb_start + (rdev->sb_size/512)) | 1563 | && rdev->data_offset < sb_start + (rdev->sb_size/512)) |
1542 | return -EINVAL; | 1564 | return -EINVAL; |
1565 | if (minor_version | ||
1566 | && rdev->new_data_offset < sb_start + (rdev->sb_size/512)) | ||
1567 | return -EINVAL; | ||
1543 | 1568 | ||
1544 | if (sb->level == cpu_to_le32(LEVEL_MULTIPATH)) | 1569 | if (sb->level == cpu_to_le32(LEVEL_MULTIPATH)) |
1545 | rdev->desc_nr = -1; | 1570 | rdev->desc_nr = -1; |
@@ -1611,16 +1636,14 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ | |||
1611 | else | 1636 | else |
1612 | ret = 0; | 1637 | ret = 0; |
1613 | } | 1638 | } |
1614 | if (minor_version) | 1639 | if (minor_version) { |
1615 | rdev->sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) - | 1640 | sectors = (i_size_read(rdev->bdev->bd_inode) >> 9); |
1616 | le64_to_cpu(sb->data_offset); | 1641 | sectors -= rdev->data_offset; |
1617 | else | 1642 | } else |
1618 | rdev->sectors = rdev->sb_start; | 1643 | sectors = rdev->sb_start; |
1619 | if (rdev->sectors < le64_to_cpu(sb->data_size)) | 1644 | if (sectors < le64_to_cpu(sb->data_size)) |
1620 | return -EINVAL; | 1645 | return -EINVAL; |
1621 | rdev->sectors = le64_to_cpu(sb->data_size); | 1646 | rdev->sectors = le64_to_cpu(sb->data_size); |
1622 | if (le64_to_cpu(sb->size) > rdev->sectors) | ||
1623 | return -EINVAL; | ||
1624 | return ret; | 1647 | return ret; |
1625 | } | 1648 | } |
1626 | 1649 | ||
@@ -1745,7 +1768,6 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) | |||
1745 | sb->feature_map = 0; | 1768 | sb->feature_map = 0; |
1746 | sb->pad0 = 0; | 1769 | sb->pad0 = 0; |
1747 | sb->recovery_offset = cpu_to_le64(0); | 1770 | sb->recovery_offset = cpu_to_le64(0); |
1748 | memset(sb->pad1, 0, sizeof(sb->pad1)); | ||
1749 | memset(sb->pad3, 0, sizeof(sb->pad3)); | 1771 | memset(sb->pad3, 0, sizeof(sb->pad3)); |
1750 | 1772 | ||
1751 | sb->utime = cpu_to_le64((__u64)mddev->utime); | 1773 | sb->utime = cpu_to_le64((__u64)mddev->utime); |
@@ -1767,6 +1789,8 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) | |||
1767 | sb->devflags |= WriteMostly1; | 1789 | sb->devflags |= WriteMostly1; |
1768 | else | 1790 | else |
1769 | sb->devflags &= ~WriteMostly1; | 1791 | sb->devflags &= ~WriteMostly1; |
1792 | sb->data_offset = cpu_to_le64(rdev->data_offset); | ||
1793 | sb->data_size = cpu_to_le64(rdev->sectors); | ||
1770 | 1794 | ||
1771 | if (mddev->bitmap && mddev->bitmap_info.file == NULL) { | 1795 | if (mddev->bitmap && mddev->bitmap_info.file == NULL) { |
1772 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset); | 1796 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset); |
@@ -1795,6 +1819,12 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) | |||
1795 | mddev->reshape_backwards) | 1819 | mddev->reshape_backwards) |
1796 | sb->feature_map | 1820 | sb->feature_map |
1797 | |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS); | 1821 | |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS); |
1822 | if (rdev->new_data_offset != rdev->data_offset) { | ||
1823 | sb->feature_map | ||
1824 | |= cpu_to_le32(MD_FEATURE_NEW_OFFSET); | ||
1825 | sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset | ||
1826 | - rdev->data_offset)); | ||
1827 | } | ||
1798 | } | 1828 | } |
1799 | 1829 | ||
1800 | if (rdev->badblocks.count == 0) | 1830 | if (rdev->badblocks.count == 0) |
@@ -1871,6 +1901,8 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) | |||
1871 | sector_t max_sectors; | 1901 | sector_t max_sectors; |
1872 | if (num_sectors && num_sectors < rdev->mddev->dev_sectors) | 1902 | if (num_sectors && num_sectors < rdev->mddev->dev_sectors) |
1873 | return 0; /* component must fit device */ | 1903 | return 0; /* component must fit device */ |
1904 | if (rdev->data_offset != rdev->new_data_offset) | ||
1905 | return 0; /* too confusing */ | ||
1874 | if (rdev->sb_start < rdev->data_offset) { | 1906 | if (rdev->sb_start < rdev->data_offset) { |
1875 | /* minor versions 1 and 2; superblock before data */ | 1907 | /* minor versions 1 and 2; superblock before data */ |
1876 | max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9; | 1908 | max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9; |
@@ -1898,6 +1930,40 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) | |||
1898 | rdev->sb_page); | 1930 | rdev->sb_page); |
1899 | md_super_wait(rdev->mddev); | 1931 | md_super_wait(rdev->mddev); |
1900 | return num_sectors; | 1932 | return num_sectors; |
1933 | |||
1934 | } | ||
1935 | |||
1936 | static int | ||
1937 | super_1_allow_new_offset(struct md_rdev *rdev, | ||
1938 | unsigned long long new_offset) | ||
1939 | { | ||
1940 | /* All necessary checks on new >= old have been done */ | ||
1941 | struct bitmap *bitmap; | ||
1942 | if (new_offset >= rdev->data_offset) | ||
1943 | return 1; | ||
1944 | |||
1945 | /* with 1.0 metadata, there is no metadata to tread on | ||
1946 | * so we can always move back */ | ||
1947 | if (rdev->mddev->minor_version == 0) | ||
1948 | return 1; | ||
1949 | |||
1950 | /* otherwise we must be sure not to step on | ||
1951 | * any metadata, so stay: | ||
1952 | * 36K beyond start of superblock | ||
1953 | * beyond end of badblocks | ||
1954 | * beyond write-intent bitmap | ||
1955 | */ | ||
1956 | if (rdev->sb_start + (32+4)*2 > new_offset) | ||
1957 | return 0; | ||
1958 | bitmap = rdev->mddev->bitmap; | ||
1959 | if (bitmap && !rdev->mddev->bitmap_info.file && | ||
1960 | rdev->sb_start + rdev->mddev->bitmap_info.offset + | ||
1961 | bitmap->file_pages * (PAGE_SIZE>>9) > new_offset) | ||
1962 | return 0; | ||
1963 | if (rdev->badblocks.sector + rdev->badblocks.size > new_offset) | ||
1964 | return 0; | ||
1965 | |||
1966 | return 1; | ||
1901 | } | 1967 | } |
1902 | 1968 | ||
1903 | static struct super_type super_types[] = { | 1969 | static struct super_type super_types[] = { |
@@ -1908,6 +1974,7 @@ static struct super_type super_types[] = { | |||
1908 | .validate_super = super_90_validate, | 1974 | .validate_super = super_90_validate, |
1909 | .sync_super = super_90_sync, | 1975 | .sync_super = super_90_sync, |
1910 | .rdev_size_change = super_90_rdev_size_change, | 1976 | .rdev_size_change = super_90_rdev_size_change, |
1977 | .allow_new_offset = super_90_allow_new_offset, | ||
1911 | }, | 1978 | }, |
1912 | [1] = { | 1979 | [1] = { |
1913 | .name = "md-1", | 1980 | .name = "md-1", |
@@ -1916,6 +1983,7 @@ static struct super_type super_types[] = { | |||
1916 | .validate_super = super_1_validate, | 1983 | .validate_super = super_1_validate, |
1917 | .sync_super = super_1_sync, | 1984 | .sync_super = super_1_sync, |
1918 | .rdev_size_change = super_1_rdev_size_change, | 1985 | .rdev_size_change = super_1_rdev_size_change, |
1986 | .allow_new_offset = super_1_allow_new_offset, | ||
1919 | }, | 1987 | }, |
1920 | }; | 1988 | }; |
1921 | 1989 | ||
@@ -2823,9 +2891,8 @@ offset_show(struct md_rdev *rdev, char *page) | |||
2823 | static ssize_t | 2891 | static ssize_t |
2824 | offset_store(struct md_rdev *rdev, const char *buf, size_t len) | 2892 | offset_store(struct md_rdev *rdev, const char *buf, size_t len) |
2825 | { | 2893 | { |
2826 | char *e; | 2894 | unsigned long long offset; |
2827 | unsigned long long offset = simple_strtoull(buf, &e, 10); | 2895 | if (strict_strtoull(buf, 10, &offset) < 0) |
2828 | if (e==buf || (*e && *e != '\n')) | ||
2829 | return -EINVAL; | 2896 | return -EINVAL; |
2830 | if (rdev->mddev->pers && rdev->raid_disk >= 0) | 2897 | if (rdev->mddev->pers && rdev->raid_disk >= 0) |
2831 | return -EBUSY; | 2898 | return -EBUSY; |
@@ -2840,6 +2907,63 @@ offset_store(struct md_rdev *rdev, const char *buf, size_t len) | |||
2840 | static struct rdev_sysfs_entry rdev_offset = | 2907 | static struct rdev_sysfs_entry rdev_offset = |
2841 | __ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store); | 2908 | __ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store); |
2842 | 2909 | ||
2910 | static ssize_t new_offset_show(struct md_rdev *rdev, char *page) | ||
2911 | { | ||
2912 | return sprintf(page, "%llu\n", | ||
2913 | (unsigned long long)rdev->new_data_offset); | ||
2914 | } | ||
2915 | |||
2916 | static ssize_t new_offset_store(struct md_rdev *rdev, | ||
2917 | const char *buf, size_t len) | ||
2918 | { | ||
2919 | unsigned long long new_offset; | ||
2920 | struct mddev *mddev = rdev->mddev; | ||
2921 | |||
2922 | if (strict_strtoull(buf, 10, &new_offset) < 0) | ||
2923 | return -EINVAL; | ||
2924 | |||
2925 | if (mddev->sync_thread) | ||
2926 | return -EBUSY; | ||
2927 | if (new_offset == rdev->data_offset) | ||
2928 | /* reset is always permitted */ | ||
2929 | ; | ||
2930 | else if (new_offset > rdev->data_offset) { | ||
2931 | /* must not push array size beyond rdev_sectors */ | ||
2932 | if (new_offset - rdev->data_offset | ||
2933 | + mddev->dev_sectors > rdev->sectors) | ||
2934 | return -E2BIG; | ||
2935 | } | ||
2936 | /* Metadata worries about other space details. */ | ||
2937 | |||
2938 | /* decreasing the offset is inconsistent with a backwards | ||
2939 | * reshape. | ||
2940 | */ | ||
2941 | if (new_offset < rdev->data_offset && | ||
2942 | mddev->reshape_backwards) | ||
2943 | return -EINVAL; | ||
2944 | /* Increasing offset is inconsistent with forwards | ||
2945 | * reshape. reshape_direction should be set to | ||
2946 | * 'backwards' first. | ||
2947 | */ | ||
2948 | if (new_offset > rdev->data_offset && | ||
2949 | !mddev->reshape_backwards) | ||
2950 | return -EINVAL; | ||
2951 | |||
2952 | if (mddev->pers && mddev->persistent && | ||
2953 | !super_types[mddev->major_version] | ||
2954 | .allow_new_offset(rdev, new_offset)) | ||
2955 | return -E2BIG; | ||
2956 | rdev->new_data_offset = new_offset; | ||
2957 | if (new_offset > rdev->data_offset) | ||
2958 | mddev->reshape_backwards = 1; | ||
2959 | else if (new_offset < rdev->data_offset) | ||
2960 | mddev->reshape_backwards = 0; | ||
2961 | |||
2962 | return len; | ||
2963 | } | ||
2964 | static struct rdev_sysfs_entry rdev_new_offset = | ||
2965 | __ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store); | ||
2966 | |||
2843 | static ssize_t | 2967 | static ssize_t |
2844 | rdev_size_show(struct md_rdev *rdev, char *page) | 2968 | rdev_size_show(struct md_rdev *rdev, char *page) |
2845 | { | 2969 | { |
@@ -2884,6 +3008,8 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len) | |||
2884 | 3008 | ||
2885 | if (strict_blocks_to_sectors(buf, §ors) < 0) | 3009 | if (strict_blocks_to_sectors(buf, §ors) < 0) |
2886 | return -EINVAL; | 3010 | return -EINVAL; |
3011 | if (rdev->data_offset != rdev->new_data_offset) | ||
3012 | return -EINVAL; /* too confusing */ | ||
2887 | if (my_mddev->pers && rdev->raid_disk >= 0) { | 3013 | if (my_mddev->pers && rdev->raid_disk >= 0) { |
2888 | if (my_mddev->persistent) { | 3014 | if (my_mddev->persistent) { |
2889 | sectors = super_types[my_mddev->major_version]. | 3015 | sectors = super_types[my_mddev->major_version]. |
@@ -3020,6 +3146,7 @@ static struct attribute *rdev_default_attrs[] = { | |||
3020 | &rdev_errors.attr, | 3146 | &rdev_errors.attr, |
3021 | &rdev_slot.attr, | 3147 | &rdev_slot.attr, |
3022 | &rdev_offset.attr, | 3148 | &rdev_offset.attr, |
3149 | &rdev_new_offset.attr, | ||
3023 | &rdev_size.attr, | 3150 | &rdev_size.attr, |
3024 | &rdev_recovery_start.attr, | 3151 | &rdev_recovery_start.attr, |
3025 | &rdev_bad_blocks.attr, | 3152 | &rdev_bad_blocks.attr, |
@@ -3094,6 +3221,7 @@ int md_rdev_init(struct md_rdev *rdev) | |||
3094 | rdev->raid_disk = -1; | 3221 | rdev->raid_disk = -1; |
3095 | rdev->flags = 0; | 3222 | rdev->flags = 0; |
3096 | rdev->data_offset = 0; | 3223 | rdev->data_offset = 0; |
3224 | rdev->new_data_offset = 0; | ||
3097 | rdev->sb_events = 0; | 3225 | rdev->sb_events = 0; |
3098 | rdev->last_read_error.tv_sec = 0; | 3226 | rdev->last_read_error.tv_sec = 0; |
3099 | rdev->last_read_error.tv_nsec = 0; | 3227 | rdev->last_read_error.tv_nsec = 0; |
@@ -3598,7 +3726,17 @@ raid_disks_store(struct mddev *mddev, const char *buf, size_t len) | |||
3598 | if (mddev->pers) | 3726 | if (mddev->pers) |
3599 | rv = update_raid_disks(mddev, n); | 3727 | rv = update_raid_disks(mddev, n); |
3600 | else if (mddev->reshape_position != MaxSector) { | 3728 | else if (mddev->reshape_position != MaxSector) { |
3729 | struct md_rdev *rdev; | ||
3601 | int olddisks = mddev->raid_disks - mddev->delta_disks; | 3730 | int olddisks = mddev->raid_disks - mddev->delta_disks; |
3731 | |||
3732 | rdev_for_each(rdev, mddev) { | ||
3733 | if (olddisks < n && | ||
3734 | rdev->data_offset < rdev->new_data_offset) | ||
3735 | return -EINVAL; | ||
3736 | if (olddisks > n && | ||
3737 | rdev->data_offset > rdev->new_data_offset) | ||
3738 | return -EINVAL; | ||
3739 | } | ||
3602 | mddev->delta_disks = n - olddisks; | 3740 | mddev->delta_disks = n - olddisks; |
3603 | mddev->raid_disks = n; | 3741 | mddev->raid_disks = n; |
3604 | mddev->reshape_backwards = (mddev->delta_disks < 0); | 3742 | mddev->reshape_backwards = (mddev->delta_disks < 0); |
@@ -4445,6 +4583,7 @@ reshape_position_show(struct mddev *mddev, char *page) | |||
4445 | static ssize_t | 4583 | static ssize_t |
4446 | reshape_position_store(struct mddev *mddev, const char *buf, size_t len) | 4584 | reshape_position_store(struct mddev *mddev, const char *buf, size_t len) |
4447 | { | 4585 | { |
4586 | struct md_rdev *rdev; | ||
4448 | char *e; | 4587 | char *e; |
4449 | unsigned long long new = simple_strtoull(buf, &e, 10); | 4588 | unsigned long long new = simple_strtoull(buf, &e, 10); |
4450 | if (mddev->pers) | 4589 | if (mddev->pers) |
@@ -4457,6 +4596,8 @@ reshape_position_store(struct mddev *mddev, const char *buf, size_t len) | |||
4457 | mddev->new_level = mddev->level; | 4596 | mddev->new_level = mddev->level; |
4458 | mddev->new_layout = mddev->layout; | 4597 | mddev->new_layout = mddev->layout; |
4459 | mddev->new_chunk_sectors = mddev->chunk_sectors; | 4598 | mddev->new_chunk_sectors = mddev->chunk_sectors; |
4599 | rdev_for_each(rdev, mddev) | ||
4600 | rdev->new_data_offset = rdev->data_offset; | ||
4460 | return len; | 4601 | return len; |
4461 | } | 4602 | } |
4462 | 4603 | ||
@@ -6001,6 +6142,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) | |||
6001 | static int update_raid_disks(struct mddev *mddev, int raid_disks) | 6142 | static int update_raid_disks(struct mddev *mddev, int raid_disks) |
6002 | { | 6143 | { |
6003 | int rv; | 6144 | int rv; |
6145 | struct md_rdev *rdev; | ||
6004 | /* change the number of raid disks */ | 6146 | /* change the number of raid disks */ |
6005 | if (mddev->pers->check_reshape == NULL) | 6147 | if (mddev->pers->check_reshape == NULL) |
6006 | return -EINVAL; | 6148 | return -EINVAL; |
@@ -6009,6 +6151,16 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks) | |||
6009 | return -EINVAL; | 6151 | return -EINVAL; |
6010 | if (mddev->sync_thread || mddev->reshape_position != MaxSector) | 6152 | if (mddev->sync_thread || mddev->reshape_position != MaxSector) |
6011 | return -EBUSY; | 6153 | return -EBUSY; |
6154 | |||
6155 | rdev_for_each(rdev, mddev) { | ||
6156 | if (mddev->raid_disks < raid_disks && | ||
6157 | rdev->data_offset < rdev->new_data_offset) | ||
6158 | return -EINVAL; | ||
6159 | if (mddev->raid_disks > raid_disks && | ||
6160 | rdev->data_offset > rdev->new_data_offset) | ||
6161 | return -EINVAL; | ||
6162 | } | ||
6163 | |||
6012 | mddev->delta_disks = raid_disks - mddev->raid_disks; | 6164 | mddev->delta_disks = raid_disks - mddev->raid_disks; |
6013 | if (mddev->delta_disks < 0) | 6165 | if (mddev->delta_disks < 0) |
6014 | mddev->reshape_backwards = 1; | 6166 | mddev->reshape_backwards = 1; |
@@ -7709,6 +7861,20 @@ void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev) | |||
7709 | } | 7861 | } |
7710 | EXPORT_SYMBOL(md_wait_for_blocked_rdev); | 7862 | EXPORT_SYMBOL(md_wait_for_blocked_rdev); |
7711 | 7863 | ||
7864 | void md_finish_reshape(struct mddev *mddev) | ||
7865 | { | ||
7866 | /* called be personality module when reshape completes. */ | ||
7867 | struct md_rdev *rdev; | ||
7868 | |||
7869 | rdev_for_each(rdev, mddev) { | ||
7870 | if (rdev->data_offset > rdev->new_data_offset) | ||
7871 | rdev->sectors += rdev->data_offset - rdev->new_data_offset; | ||
7872 | else | ||
7873 | rdev->sectors -= rdev->new_data_offset - rdev->data_offset; | ||
7874 | rdev->data_offset = rdev->new_data_offset; | ||
7875 | } | ||
7876 | } | ||
7877 | EXPORT_SYMBOL(md_finish_reshape); | ||
7712 | 7878 | ||
7713 | /* Bad block management. | 7879 | /* Bad block management. |
7714 | * We can record which blocks on each device are 'bad' and so just | 7880 | * We can record which blocks on each device are 'bad' and so just |
@@ -7957,10 +8123,15 @@ static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors, | |||
7957 | } | 8123 | } |
7958 | 8124 | ||
7959 | int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, | 8125 | int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, |
7960 | int acknowledged) | 8126 | int is_new) |
7961 | { | 8127 | { |
7962 | int rv = md_set_badblocks(&rdev->badblocks, | 8128 | int rv; |
7963 | s + rdev->data_offset, sectors, acknowledged); | 8129 | if (is_new) |
8130 | s += rdev->new_data_offset; | ||
8131 | else | ||
8132 | s += rdev->data_offset; | ||
8133 | rv = md_set_badblocks(&rdev->badblocks, | ||
8134 | s, sectors, 0); | ||
7964 | if (rv) { | 8135 | if (rv) { |
7965 | /* Make sure they get written out promptly */ | 8136 | /* Make sure they get written out promptly */ |
7966 | sysfs_notify_dirent_safe(rdev->sysfs_state); | 8137 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
@@ -8066,11 +8237,15 @@ out: | |||
8066 | return rv; | 8237 | return rv; |
8067 | } | 8238 | } |
8068 | 8239 | ||
8069 | int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors) | 8240 | int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, |
8241 | int is_new) | ||
8070 | { | 8242 | { |
8243 | if (is_new) | ||
8244 | s += rdev->new_data_offset; | ||
8245 | else | ||
8246 | s += rdev->data_offset; | ||
8071 | return md_clear_badblocks(&rdev->badblocks, | 8247 | return md_clear_badblocks(&rdev->badblocks, |
8072 | s + rdev->data_offset, | 8248 | s, sectors); |
8073 | sectors); | ||
8074 | } | 8249 | } |
8075 | EXPORT_SYMBOL_GPL(rdev_clear_badblocks); | 8250 | EXPORT_SYMBOL_GPL(rdev_clear_badblocks); |
8076 | 8251 | ||
diff --git a/drivers/md/md.h b/drivers/md/md.h index d51c0ca37777..98913e8dac1a 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -55,6 +55,7 @@ struct md_rdev { | |||
55 | int sb_loaded; | 55 | int sb_loaded; |
56 | __u64 sb_events; | 56 | __u64 sb_events; |
57 | sector_t data_offset; /* start of data in array */ | 57 | sector_t data_offset; /* start of data in array */ |
58 | sector_t new_data_offset;/* only relevant while reshaping */ | ||
58 | sector_t sb_start; /* offset of the super block (in 512byte sectors) */ | 59 | sector_t sb_start; /* offset of the super block (in 512byte sectors) */ |
59 | int sb_size; /* bytes in the superblock */ | 60 | int sb_size; /* bytes in the superblock */ |
60 | int preferred_minor; /* autorun support */ | 61 | int preferred_minor; /* autorun support */ |
@@ -193,8 +194,9 @@ static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors, | |||
193 | return 0; | 194 | return 0; |
194 | } | 195 | } |
195 | extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, | 196 | extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, |
196 | int acknowledged); | 197 | int is_new); |
197 | extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors); | 198 | extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, |
199 | int is_new); | ||
198 | extern void md_ack_all_badblocks(struct badblocks *bb); | 200 | extern void md_ack_all_badblocks(struct badblocks *bb); |
199 | 201 | ||
200 | struct mddev { | 202 | struct mddev { |
@@ -592,6 +594,7 @@ extern void md_write_start(struct mddev *mddev, struct bio *bi); | |||
592 | extern void md_write_end(struct mddev *mddev); | 594 | extern void md_write_end(struct mddev *mddev); |
593 | extern void md_done_sync(struct mddev *mddev, int blocks, int ok); | 595 | extern void md_done_sync(struct mddev *mddev, int blocks, int ok); |
594 | extern void md_error(struct mddev *mddev, struct md_rdev *rdev); | 596 | extern void md_error(struct mddev *mddev, struct md_rdev *rdev); |
597 | extern void md_finish_reshape(struct mddev *mddev); | ||
595 | 598 | ||
596 | extern int mddev_congested(struct mddev *mddev, int bits); | 599 | extern int mddev_congested(struct mddev *mddev, int bits); |
597 | extern void md_flush_request(struct mddev *mddev, struct bio *bio); | 600 | extern void md_flush_request(struct mddev *mddev, struct bio *bio); |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 15dd59b84e94..71a7dc038a82 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -2024,7 +2024,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio | |||
2024 | continue; | 2024 | continue; |
2025 | if (test_bit(BIO_UPTODATE, &bio->bi_flags) && | 2025 | if (test_bit(BIO_UPTODATE, &bio->bi_flags) && |
2026 | test_bit(R1BIO_MadeGood, &r1_bio->state)) { | 2026 | test_bit(R1BIO_MadeGood, &r1_bio->state)) { |
2027 | rdev_clear_badblocks(rdev, r1_bio->sector, s); | 2027 | rdev_clear_badblocks(rdev, r1_bio->sector, s, 0); |
2028 | } | 2028 | } |
2029 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags) && | 2029 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags) && |
2030 | test_bit(R1BIO_WriteError, &r1_bio->state)) { | 2030 | test_bit(R1BIO_WriteError, &r1_bio->state)) { |
@@ -2044,7 +2044,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio) | |||
2044 | struct md_rdev *rdev = conf->mirrors[m].rdev; | 2044 | struct md_rdev *rdev = conf->mirrors[m].rdev; |
2045 | rdev_clear_badblocks(rdev, | 2045 | rdev_clear_badblocks(rdev, |
2046 | r1_bio->sector, | 2046 | r1_bio->sector, |
2047 | r1_bio->sectors); | 2047 | r1_bio->sectors, 0); |
2048 | rdev_dec_pending(rdev, conf->mddev); | 2048 | rdev_dec_pending(rdev, conf->mddev); |
2049 | } else if (r1_bio->bios[m] != NULL) { | 2049 | } else if (r1_bio->bios[m] != NULL) { |
2050 | /* This drive got a write error. We need to | 2050 | /* This drive got a write error. We need to |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 3f91c2e1dfe7..832fb4d56657 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -2480,7 +2480,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) | |||
2480 | rdev_clear_badblocks( | 2480 | rdev_clear_badblocks( |
2481 | rdev, | 2481 | rdev, |
2482 | r10_bio->devs[m].addr, | 2482 | r10_bio->devs[m].addr, |
2483 | r10_bio->sectors); | 2483 | r10_bio->sectors, 0); |
2484 | } else { | 2484 | } else { |
2485 | if (!rdev_set_badblocks( | 2485 | if (!rdev_set_badblocks( |
2486 | rdev, | 2486 | rdev, |
@@ -2496,7 +2496,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) | |||
2496 | rdev_clear_badblocks( | 2496 | rdev_clear_badblocks( |
2497 | rdev, | 2497 | rdev, |
2498 | r10_bio->devs[m].addr, | 2498 | r10_bio->devs[m].addr, |
2499 | r10_bio->sectors); | 2499 | r10_bio->sectors, 0); |
2500 | } else { | 2500 | } else { |
2501 | if (!rdev_set_badblocks( | 2501 | if (!rdev_set_badblocks( |
2502 | rdev, | 2502 | rdev, |
@@ -2515,7 +2515,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) | |||
2515 | rdev_clear_badblocks( | 2515 | rdev_clear_badblocks( |
2516 | rdev, | 2516 | rdev, |
2517 | r10_bio->devs[m].addr, | 2517 | r10_bio->devs[m].addr, |
2518 | r10_bio->sectors); | 2518 | r10_bio->sectors, 0); |
2519 | rdev_dec_pending(rdev, conf->mddev); | 2519 | rdev_dec_pending(rdev, conf->mddev); |
2520 | } else if (bio != NULL && | 2520 | } else if (bio != NULL && |
2521 | !test_bit(BIO_UPTODATE, &bio->bi_flags)) { | 2521 | !test_bit(BIO_UPTODATE, &bio->bi_flags)) { |
@@ -2532,7 +2532,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) | |||
2532 | rdev_clear_badblocks( | 2532 | rdev_clear_badblocks( |
2533 | rdev, | 2533 | rdev, |
2534 | r10_bio->devs[m].addr, | 2534 | r10_bio->devs[m].addr, |
2535 | r10_bio->sectors); | 2535 | r10_bio->sectors, 0); |
2536 | rdev_dec_pending(rdev, conf->mddev); | 2536 | rdev_dec_pending(rdev, conf->mddev); |
2537 | } | 2537 | } |
2538 | } | 2538 | } |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0abbd3447cfb..3705585d7567 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -3561,7 +3561,7 @@ finish: | |||
3561 | if (test_and_clear_bit(R5_MadeGood, &dev->flags)) { | 3561 | if (test_and_clear_bit(R5_MadeGood, &dev->flags)) { |
3562 | rdev = conf->disks[i].rdev; | 3562 | rdev = conf->disks[i].rdev; |
3563 | rdev_clear_badblocks(rdev, sh->sector, | 3563 | rdev_clear_badblocks(rdev, sh->sector, |
3564 | STRIPE_SECTORS); | 3564 | STRIPE_SECTORS, 0); |
3565 | rdev_dec_pending(rdev, conf->mddev); | 3565 | rdev_dec_pending(rdev, conf->mddev); |
3566 | } | 3566 | } |
3567 | if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) { | 3567 | if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) { |
@@ -3570,7 +3570,7 @@ finish: | |||
3570 | /* rdev have been moved down */ | 3570 | /* rdev have been moved down */ |
3571 | rdev = conf->disks[i].rdev; | 3571 | rdev = conf->disks[i].rdev; |
3572 | rdev_clear_badblocks(rdev, sh->sector, | 3572 | rdev_clear_badblocks(rdev, sh->sector, |
3573 | STRIPE_SECTORS); | 3573 | STRIPE_SECTORS, 0); |
3574 | rdev_dec_pending(rdev, conf->mddev); | 3574 | rdev_dec_pending(rdev, conf->mddev); |
3575 | } | 3575 | } |
3576 | } | 3576 | } |
@@ -5505,10 +5505,14 @@ static int raid5_start_reshape(struct mddev *mddev) | |||
5505 | if (!check_stripe_cache(mddev)) | 5505 | if (!check_stripe_cache(mddev)) |
5506 | return -ENOSPC; | 5506 | return -ENOSPC; |
5507 | 5507 | ||
5508 | rdev_for_each(rdev, mddev) | 5508 | rdev_for_each(rdev, mddev) { |
5509 | /* Don't support changing data_offset yet */ | ||
5510 | if (rdev->new_data_offset != rdev->data_offset) | ||
5511 | return -EINVAL; | ||
5509 | if (!test_bit(In_sync, &rdev->flags) | 5512 | if (!test_bit(In_sync, &rdev->flags) |
5510 | && !test_bit(Faulty, &rdev->flags)) | 5513 | && !test_bit(Faulty, &rdev->flags)) |
5511 | spares++; | 5514 | spares++; |
5515 | } | ||
5512 | 5516 | ||
5513 | if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded) | 5517 | if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded) |
5514 | /* Not enough devices even to make a degraded array | 5518 | /* Not enough devices even to make a degraded array |