From 5ef56c8fecedf403a346d02140e52a072d693d6b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 25 Aug 2011 14:42:51 +1000 Subject: md: report failure if a 'set faulty' request doesn't. Sometimes a device will refuse to be set faulty. e.g. RAID1 will never let the last working device become faulty. So check if "md_error()" did manage to set the faulty flag and fail with EBUSY if it didn't. Resolves-Debian-Bug: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=601198 Reported-by: Mike Hommey Signed-off-by: NeilBrown --- drivers/md/md.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 8e221a20f5d9..1cd9bfb45e9a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2561,7 +2561,10 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) int err = -EINVAL; if (cmd_match(buf, "faulty") && rdev->mddev->pers) { md_error(rdev->mddev, rdev); - err = 0; + if (test_bit(Faulty, &rdev->flags)) + err = 0; + else + err = -EBUSY; } else if (cmd_match(buf, "remove")) { if (rdev->raid_disk >= 0) err = -EBUSY; @@ -5983,6 +5986,8 @@ static int set_disk_faulty(mddev_t *mddev, dev_t dev) return -ENODEV; md_error(mddev, rdev); + if (!test_bit(Faulty, &rdev->flags)) + return -EBUSY; return 0; } -- cgit v1.2.2 From aeb9b211849621f592288ed5ad694de9eeaae87a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 25 Aug 2011 14:43:08 +1000 Subject: md: ensure changes to 'write-mostly' are reflected in metadata. The 'write-mostly' flag can be changed through sysfs. With 0.90 metadata, those changes are reflected in the metadata. For 1.x metadata, they aren't. So fix super_1_sync to record 'write-mostly' status. Signed-off-by: NeilBrown --- drivers/md/md.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 1cd9bfb45e9a..9a880239219d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1738,6 +1738,11 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->level = cpu_to_le32(mddev->level); sb->layout = cpu_to_le32(mddev->layout); + if (test_bit(WriteMostly, &rdev->flags)) + sb->devflags |= WriteMostly1; + else + sb->devflags &= ~WriteMostly1; + if (mddev->bitmap && mddev->bitmap_info.file == NULL) { sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset); sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); -- cgit v1.2.2 From a5bf4df0c88b88d34b6f0e3bc8a402dac7d14611 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Aug 2011 14:43:34 +1000 Subject: md: use REQ_NOIDLE flag in md_super_write() Queue idling is used for the anticipation of immediate sequencial I/O's but md_super_write() is a kind of one- shot operation, coupled with md_super_wait(), so the idling in this case will be just a waste of time. Specifying REQ_NOIDLE prevents it. Instead of adding the flag to submit_bio() directly, use pre-defined macro WRITE_FLUSH_FUA. Signed-off-by: Namhyung Kim Signed-off-by: NeilBrown --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 9a880239219d..aca611711264 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -848,7 +848,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, bio->bi_end_io = super_written; atomic_inc(&mddev->pending_writes); - submit_bio(REQ_WRITE | REQ_SYNC | REQ_FLUSH | REQ_FUA, bio); + submit_bio(WRITE_FLUSH_FUA, bio); } void md_super_wait(mddev_t *mddev) -- cgit v1.2.2 From 7da64a0abc3b2c6cbd3521672e9bb74dd560bb89 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 30 Aug 2011 16:20:17 +1000 Subject: md: fix clearing of 'blocked' flag in the presence of bad blocks. When the 'blocked' flag on a device is cleared while there are unacknowledged bad blocks we must fail the device. This is needed for backwards compatability of the interface. The code currently uses the wrong test for "unacknowledged bad blocks exist". Change it to the right test. Signed-off-by: NeilBrown --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index aca611711264..3742ce8b0acf 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2592,7 +2592,7 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) err = 0; } else if (cmd_match(buf, "-blocked")) { if (!test_bit(Faulty, &rdev->flags) && - test_bit(BlockedBadBlocks, &rdev->flags)) { + rdev->badblocks.unacked_exist) { /* metadata handler doesn't understand badblocks, * so we need to fail the device */ -- cgit v1.2.2 From 27a7b260f71439c40546b43588448faac01adb93 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 10 Sep 2011 17:21:28 +1000 Subject: md: Fix handling for devices from 2TB to 4TB in 0.90 metadata. 0.90 metadata uses an unsigned 32bit number to count the number of kilobytes used from each device. This should allow up to 4TB per device. However we multiply this by 2 (to get sectors) before casting to a larger type, so sizes above 2TB get truncated. Also we allow rdev->sectors to be larger than 4TB, so it is possible for the array to be resized larger than the metadata can handle. So make sure rdev->sectors never exceeds 4TB when 0.90 metadata is in used. Also the sanity check at the end of super_90_load should include level 1 as it used ->size too. (RAID0 and Linear don't use ->size at all). Reported-by: Pim Zandbergen Cc: stable@kernel.org Signed-off-by: NeilBrown --- drivers/md/md.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 3742ce8b0acf..5404b2295820 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1138,8 +1138,11 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version ret = 0; } rdev->sectors = rdev->sb_start; + /* Limit to 4TB as metadata cannot record more than that */ + if (rdev->sectors >= (2ULL << 32)) + rdev->sectors = (2ULL << 32) - 2; - if (rdev->sectors < sb->size * 2 && sb->level > 1) + if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1) /* "this cannot possibly happen" ... */ ret = -EINVAL; @@ -1173,7 +1176,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->clevel[0] = 0; mddev->layout = sb->layout; mddev->raid_disks = sb->raid_disks; - mddev->dev_sectors = sb->size * 2; + mddev->dev_sectors = ((sector_t)sb->size) * 2; mddev->events = ev1; mddev->bitmap_info.offset = 0; mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9; @@ -1415,6 +1418,11 @@ super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) rdev->sb_start = calc_dev_sboffset(rdev); if (!num_sectors || num_sectors > rdev->sb_start) num_sectors = rdev->sb_start; + /* Limit to 4TB as metadata cannot record more than that. + * 4TB == 2^32 KB, or 2*2^32 sectors. + */ + if (num_sectors >= (2ULL << 32)) + num_sectors = (2ULL << 32) - 2; md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, rdev->sb_page); md_super_wait(rdev->mddev); -- cgit v1.2.2