From bd52b746262c8d77e73903d6608014fb2fcdcd9d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 30 Aug 2010 17:33:33 +1000 Subject: md: don't clear MD_CHANGE_CLEAN in md_update_sb() for external arrays If this bit is cleared in md_update_sb() the kernel will allow writes to the array if userspace triggers md_allow_write(), e.g. through stripe_cache_size, when mdmon is not active. When mdmon is active the array transitions to active-idle bypassing write-pending, setting up a race for mdmon to set the array clean before a write arrives. Signed-off-by: Dan Williams Signed-off-by: NeilBrown --- drivers/md/md.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index c148b6302154..a1f6b59b8b37 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2167,9 +2167,10 @@ repeat: rdev->recovery_offset = mddev->curr_resync_completed; } - if (mddev->external || !mddev->persistent) { + if (!mddev->persistent) { + if (!mddev->external) + clear_bit(MD_CHANGE_CLEAN, &mddev->flags); clear_bit(MD_CHANGE_DEVS, &mddev->flags); - clear_bit(MD_CHANGE_CLEAN, &mddev->flags); wake_up(&mddev->sb_wait); return; } -- cgit v1.2.2 From 070dc6dd7103b6b3f7e4d46e754354a5c15f366e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 30 Aug 2010 17:33:34 +1000 Subject: md: resolve confusion of MD_CHANGE_CLEAN MD_CHANGE_CLEAN is used for two different purposes and this leads to confusion. One of the purposes is largely mirrored by MD_CHANGE_PENDING which is not used for anything else, so have MD_CHANGE_PENDING take over that purpose fully. The two purposes are: 1/ tell md_update_sb that an update is needed and that it is just a clean/dirty transition. 2/ tell user-space that an transition from clean to dirty is pending (something wants to write), and tell te kernel (by clearin the flag) that the transition is OK. The first purpose remains wit MD_CHANGE_CLEAN, the second is moved fully to MD_CHANGE_PENDING. This means that various places which conditionally set or cleared MD_CHANGE_CLEAN no longer need to be conditional. Signed-off-by: NeilBrown --- drivers/md/md.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index a1f6b59b8b37..43cf9cc9c1df 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2168,8 +2168,7 @@ repeat: } if (!mddev->persistent) { - if (!mddev->external) - clear_bit(MD_CHANGE_CLEAN, &mddev->flags); + clear_bit(MD_CHANGE_CLEAN, &mddev->flags); clear_bit(MD_CHANGE_DEVS, &mddev->flags); wake_up(&mddev->sb_wait); return; @@ -2179,7 +2178,6 @@ repeat: mddev->utime = get_seconds(); - set_bit(MD_CHANGE_PENDING, &mddev->flags); if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags)) force_change = 1; if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags)) @@ -3372,7 +3370,7 @@ array_state_show(mddev_t *mddev, char *page) case 0: if (mddev->in_sync) st = clean; - else if (test_bit(MD_CHANGE_CLEAN, &mddev->flags)) + else if (test_bit(MD_CHANGE_PENDING, &mddev->flags)) st = write_pending; else if (mddev->safemode) st = active_idle; @@ -3453,9 +3451,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) mddev->in_sync = 1; if (mddev->safemode == 1) mddev->safemode = 0; - if (mddev->persistent) - set_bit(MD_CHANGE_CLEAN, - &mddev->flags); + set_bit(MD_CHANGE_CLEAN, &mddev->flags); } err = 0; } else @@ -3467,8 +3463,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) case active: if (mddev->pers) { restart_array(mddev); - if (mddev->external) - clear_bit(MD_CHANGE_CLEAN, &mddev->flags); + clear_bit(MD_CHANGE_PENDING, &mddev->flags); wake_up(&mddev->sb_wait); err = 0; } else { @@ -6573,6 +6568,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi) if (mddev->in_sync) { mddev->in_sync = 0; set_bit(MD_CHANGE_CLEAN, &mddev->flags); + set_bit(MD_CHANGE_PENDING, &mddev->flags); md_wakeup_thread(mddev->thread); did_change = 1; } @@ -6581,7 +6577,6 @@ void md_write_start(mddev_t *mddev, struct bio *bi) if (did_change) sysfs_notify_dirent_safe(mddev->sysfs_state); wait_event(mddev->sb_wait, - !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && !test_bit(MD_CHANGE_PENDING, &mddev->flags)); } @@ -6617,6 +6612,7 @@ int md_allow_write(mddev_t *mddev) if (mddev->in_sync) { mddev->in_sync = 0; set_bit(MD_CHANGE_CLEAN, &mddev->flags); + set_bit(MD_CHANGE_PENDING, &mddev->flags); if (mddev->safemode_delay && mddev->safemode == 0) mddev->safemode = 1; @@ -6626,7 +6622,7 @@ int md_allow_write(mddev_t *mddev) } else spin_unlock_irq(&mddev->write_lock); - if (test_bit(MD_CHANGE_CLEAN, &mddev->flags)) + if (test_bit(MD_CHANGE_PENDING, &mddev->flags)) return -EAGAIN; else return 0; @@ -6824,8 +6820,7 @@ void md_do_sync(mddev_t *mddev) atomic_read(&mddev->recovery_active) == 0); mddev->curr_resync_completed = mddev->curr_resync; - if (mddev->persistent) - set_bit(MD_CHANGE_CLEAN, &mddev->flags); + set_bit(MD_CHANGE_CLEAN, &mddev->flags); sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } @@ -7104,8 +7099,7 @@ void md_check_recovery(mddev_t *mddev) mddev->recovery_cp == MaxSector) { mddev->in_sync = 1; did_change = 1; - if (mddev->persistent) - set_bit(MD_CHANGE_CLEAN, &mddev->flags); + set_bit(MD_CHANGE_CLEAN, &mddev->flags); } if (mddev->safemode == 1) mddev->safemode = 0; -- cgit v1.2.2 From 126925c090155f13e90b9e7e8c4010e96027c00a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 7 Sep 2010 17:02:47 +1000 Subject: md: call md_update_sb even for 'external' metadata arrays. Now that we depend on md_update_sb to clear variable bits in mddev->flags (rather than trying not to set them) it is important to always call md_update_sb when appropriate. md_check_recovery has this job but explicitly avoids it for ->external metadata arrays. This is not longer appropraite, or needed. However we do want to avoid taking the mddev lock if only MD_CHANGE_PENDING is set as that is not cleared by md_update_sb for external-metadata arrays. Reported-by: "Kwolek, Adam" Signed-off-by: NeilBrown --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 43cf9cc9c1df..bdd9bba577b0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7069,7 +7069,7 @@ void md_check_recovery(mddev_t *mddev) if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) return; if ( ! ( - (mddev->flags && !mddev->external) || + (mddev->flags & ~ (1<recovery) || test_bit(MD_RECOVERY_DONE, &mddev->recovery) || (mddev->external == 0 && mddev->safemode == 1) || -- cgit v1.2.2 From ddcf3522cf03a147c867a2e0155761652dbd156a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 8 Sep 2010 16:48:17 +1000 Subject: md: fix v1.x metadata update when a disk is missing. If an array with 1.x metadata is assembled with the last disk missing, md doesn't properly record the fact that the disk was missing. This is unlikely to cause a real problem as the event count will be different to the count on the missing disk so it won't be included in the array. However it could still cause confusion. So make sure we clear all the relevant slots, not just the early ones. Signed-off-by: NeilBrown --- drivers/md/md.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index bdd9bba577b0..f20d13e717d5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1643,7 +1643,9 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1; if (rdev->sb_size & bmask) rdev->sb_size = (rdev->sb_size | bmask) + 1; - } + } else + max_dev = le32_to_cpu(sb->max_dev); + for (i=0; idev_roles[i] = cpu_to_le16(0xfffe); -- cgit v1.2.2