From a4423287ec16e74c25de8ee3f261b1ea18c0af67 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Tue, 9 Aug 2016 14:56:14 +0200 Subject: dm raid: fix frozen recovery regression On LVM2 conversions via lvconvert(8), the target keeps mapped devices in frozen state when requesting RAID devices be resynchronized. This applies to e.g. adding legs to a raid1 device or taking over from raid0 to raid4 when the rebuild flag's set on the new raid1 legs or the added dedicated parity stripe. Also, fix frozen recovery for reshaping as well. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/md/dm-raid.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 1b9795d75ef8..07d17287fa4a 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -191,7 +191,6 @@ struct raid_dev { #define RT_FLAG_RS_BITMAP_LOADED 2 #define RT_FLAG_UPDATE_SBS 3 #define RT_FLAG_RESHAPE_RS 4 -#define RT_FLAG_KEEP_RS_FROZEN 5 /* Array elements of 64 bit needed for rebuild/failed disk bits */ #define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8) @@ -2579,7 +2578,6 @@ static int rs_prepare_reshape(struct raid_set *rs) } else { /* Process raid1 without delta_disks */ mddev->raid_disks = rs->raid_disks; - set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags); reshape = false; } } else { @@ -2590,7 +2588,6 @@ static int rs_prepare_reshape(struct raid_set *rs) if (reshape) { set_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags); set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); - set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags); } else if (mddev->raid_disks < rs->raid_disks) /* Create new superblocks and bitmaps, if any new disks */ set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); @@ -2902,7 +2899,6 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); - set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags); /* Takeover ain't recovery, so disable recovery */ rs_setup_recovery(rs, MaxSector); rs_set_new(rs); @@ -3624,8 +3620,7 @@ static void raid_resume(struct dm_target *ti) * retrieved from the superblock by the ctr because * the ongoing recovery/reshape will change it after read. */ - if (!test_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags)) - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); if (mddev->suspended) mddev_resume(mddev); -- cgit v1.2.2 From 31e10a41203dbc95e0c1e81ef49ad1773a50d4f9 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 10 Aug 2016 02:45:59 +0200 Subject: dm raid: fix restoring of failed devices regression 'lvchange --refresh RaidLV' causes a mapped device suspend/resume cycle aiming at device restore and resync after transient device failures. This failed because flag RT_FLAG_RS_RESUMED was always cleared in the suspend path, thus the device restore wasn't performed in the resume path. Solve by removing RT_FLAG_RS_RESUMED from the suspend path and resume unconditionally. Also, remove superfluous comment from raid_resume(). Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) (limited to 'drivers/md/dm-raid.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 07d17287fa4a..81ec772b1cc9 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3382,11 +3382,10 @@ static void raid_postsuspend(struct dm_target *ti) { struct raid_set *rs = ti->private; - if (test_and_clear_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags)) { - if (!rs->md.suspended) - mddev_suspend(&rs->md); - rs->md.ro = 1; - } + if (!rs->md.suspended) + mddev_suspend(&rs->md); + + rs->md.ro = 1; } static void attempt_restore_of_faulty_devices(struct raid_set *rs) @@ -3606,25 +3605,15 @@ static void raid_resume(struct dm_target *ti) * devices are reachable again. */ attempt_restore_of_faulty_devices(rs); - } else { - mddev->ro = 0; - mddev->in_sync = 0; + } - /* - * When passing in flags to the ctr, we expect userspace - * to reset them because they made it to the superblocks - * and reload the mapping anyway. - * - * -> only unfreeze recovery in case of a table reload or - * we'll have a bogus recovery/reshape position - * retrieved from the superblock by the ctr because - * the ongoing recovery/reshape will change it after read. - */ - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + mddev->ro = 0; + mddev->in_sync = 0; - if (mddev->suspended) - mddev_resume(mddev); - } + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + + if (mddev->suspended) + mddev_resume(mddev); } static struct target_type raid_target = { -- cgit v1.2.2 From a3c06a389751192fdcbcdd8bba57bdb856eafe68 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Tue, 9 Aug 2016 14:55:35 +0200 Subject: dm raid: enhance attempt_restore_of_faulty_devices() to support more devices attempt_restore_of_faulty_devices() is limited to 64 when it should support the new maximum of 253 when identifying any failed devices. It clears any revivable devices via an MD personality hot remove and add cylce to allow for their recovery. Address by using existing functions to retrieve and update all failed devices' bitfield members in the dm raid superblocks on all RAID devices and check for any devices to clear in it. Whilst on it, don't call attempt_restore_of_faulty_devices() for any MD personality not providing disk hot add/remove methods (i.e. raid0 now), because such personalities don't support reviving of failed disks. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) (limited to 'drivers/md/dm-raid.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 81ec772b1cc9..b1c251872800 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3391,11 +3391,19 @@ static void raid_postsuspend(struct dm_target *ti) static void attempt_restore_of_faulty_devices(struct raid_set *rs) { int i; - uint64_t failed_devices, cleared_failed_devices = 0; + uint64_t cleared_failed_devices[DISKS_ARRAY_ELEMS]; unsigned long flags; + bool cleared = false; struct dm_raid_superblock *sb; + struct mddev *mddev = &rs->md; struct md_rdev *r; + /* RAID personalities have to provide hot add/remove methods or we need to bail out. */ + if (!mddev->pers || !mddev->pers->hot_add_disk || !mddev->pers->hot_remove_disk) + return; + + memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices)); + for (i = 0; i < rs->md.raid_disks; i++) { r = &rs->dev[i].rdev; if (test_bit(Faulty, &r->flags) && r->sb_page && @@ -3415,7 +3423,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs) * ourselves. */ if ((r->raid_disk >= 0) && - (r->mddev->pers->hot_remove_disk(r->mddev, r) != 0)) + (mddev->pers->hot_remove_disk(mddev, r) != 0)) /* Failed to revive this device, try next */ continue; @@ -3425,22 +3433,30 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs) clear_bit(Faulty, &r->flags); clear_bit(WriteErrorSeen, &r->flags); clear_bit(In_sync, &r->flags); - if (r->mddev->pers->hot_add_disk(r->mddev, r)) { + if (mddev->pers->hot_add_disk(mddev, r)) { r->raid_disk = -1; r->saved_raid_disk = -1; r->flags = flags; } else { r->recovery_offset = 0; - cleared_failed_devices |= 1 << i; + set_bit(i, (void *) cleared_failed_devices); + cleared = true; } } } - if (cleared_failed_devices) { + + /* If any failed devices could be cleared, update all sbs failed_devices bits */ + if (cleared) { + uint64_t failed_devices[DISKS_ARRAY_ELEMS]; + rdev_for_each(r, &rs->md) { sb = page_address(r->sb_page); - failed_devices = le64_to_cpu(sb->failed_devices); - failed_devices &= ~cleared_failed_devices; - sb->failed_devices = cpu_to_le64(failed_devices); + sb_retrieve_failed_devices(sb, failed_devices); + + for (i = 0; i < DISKS_ARRAY_ELEMS; i++) + failed_devices[i] &= ~cleared_failed_devices[i]; + + sb_update_failed_devices(sb, failed_devices); } } } -- cgit v1.2.2 From 9e7d9367e6a29e4cac85aa6df199f760dfa39c8a Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 17 Aug 2016 15:36:44 +0200 Subject: dm raid: support raid0 with missing metadata devices The raid0 MD personality does not start a raid0 array with any of its data devices missing. dm-raid was removing data/metadata device pairs unconditionally if it failed to read a superblock off the respective metadata device of such pair, resulting in failure to start arrays with the raid0 personality. Avoid removing any data/metadata device pairs in case of raid0 (e.g. lvm2 segment type 'raid0_meta') thus allowing MD to start the array. Also, avoid region size validation for raid0. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/md/dm-raid.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index b1c251872800..8abde6b8cedc 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -860,6 +860,9 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) { unsigned long min_region_size = rs->ti->len / (1 << 21); + if (rs_is_raid0(rs)) + return 0; + if (!region_size) { /* * Choose a reasonable default. All figures in sectors. @@ -929,6 +932,8 @@ static int validate_raid_redundancy(struct raid_set *rs) rebuild_cnt++; switch (rs->raid_type->level) { + case 0: + break; case 1: if (rebuild_cnt >= rs->md.raid_disks) goto too_many; @@ -2334,6 +2339,13 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) case 0: break; default: + /* + * We have to keep any raid0 data/metadata device pairs or + * the MD raid0 personality will fail to start the array. + */ + if (rs_is_raid0(rs)) + continue; + dev = container_of(rdev, struct raid_dev, rdev); if (dev->meta_dev) dm_put_device(ti, dev->meta_dev); -- cgit v1.2.2