diff options
author | NeilBrown <neilb@suse.de> | 2013-11-13 23:16:17 -0500 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2013-11-18 23:19:17 -0500 |
commit | 30b8feb730f9b9b3c5de02580897da03f59b6b16 (patch) | |
tree | 7e902a3be606c7f46b16c62b0621dc58580fbbd9 /drivers/md | |
parent | c91abf5a3546a4ff0838d2905f4d7eae2795f724 (diff) |
md/raid5: avoid deadlock when raid5 array has unack badblocks during md_stop_writes.
When raid5 recovery hits a fresh badblock, this badblock will flagged as unack
badblock until md_update_sb() is called.
But md_stop will take reconfig lock which means raid5d can't call
md_update_sb() in md_check_recovery(), the badblock will always
be unack, so raid5d thread enters an infinite loop and md_stop_write()
can never stop sync_thread. This causes deadlock.
To solve this, when STOP_ARRAY ioctl is issued and sync_thread is
running, we need set md->recovery FROZEN and INTR flags and wait for
sync_thread to stop before we (re)take reconfig lock.
This requires that raid5 reshape_request notices MD_RECOVERY_INTR
(which it probably should have noticed anyway) and stops waiting for a
metadata update in that case.
Reported-by: Jianpeng Ma <majianpeng@gmail.com>
Reported-by: Bian Yu <bianyu@kedacom.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/md.c | 68 |
1 files changed, 49 insertions, 19 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index a74045df7bab..47e7bc74ed38 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -5340,20 +5340,35 @@ EXPORT_SYMBOL_GPL(md_stop); | |||
5340 | static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) | 5340 | static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) |
5341 | { | 5341 | { |
5342 | int err = 0; | 5342 | int err = 0; |
5343 | int did_freeze = 0; | ||
5344 | |||
5345 | if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) { | ||
5346 | did_freeze = 1; | ||
5347 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | ||
5348 | md_wakeup_thread(mddev->thread); | ||
5349 | } | ||
5350 | if (mddev->sync_thread) { | ||
5351 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
5352 | /* Thread might be blocked waiting for metadata update | ||
5353 | * which will now never happen */ | ||
5354 | wake_up_process(mddev->sync_thread->tsk); | ||
5355 | } | ||
5356 | mddev_unlock(mddev); | ||
5357 | wait_event(resync_wait, mddev->sync_thread == NULL); | ||
5358 | mddev_lock_nointr(mddev); | ||
5359 | |||
5343 | mutex_lock(&mddev->open_mutex); | 5360 | mutex_lock(&mddev->open_mutex); |
5344 | if (atomic_read(&mddev->openers) > !!bdev) { | 5361 | if (atomic_read(&mddev->openers) > !!bdev || |
5362 | mddev->sync_thread || | ||
5363 | (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) { | ||
5345 | printk("md: %s still in use.\n",mdname(mddev)); | 5364 | printk("md: %s still in use.\n",mdname(mddev)); |
5365 | if (did_freeze) { | ||
5366 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | ||
5367 | md_wakeup_thread(mddev->thread); | ||
5368 | } | ||
5346 | err = -EBUSY; | 5369 | err = -EBUSY; |
5347 | goto out; | 5370 | goto out; |
5348 | } | 5371 | } |
5349 | if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) { | ||
5350 | /* Someone opened the device since we flushed it | ||
5351 | * so page cache could be dirty and it is too late | ||
5352 | * to flush. So abort | ||
5353 | */ | ||
5354 | mutex_unlock(&mddev->open_mutex); | ||
5355 | return -EBUSY; | ||
5356 | } | ||
5357 | if (mddev->pers) { | 5372 | if (mddev->pers) { |
5358 | __md_stop_writes(mddev); | 5373 | __md_stop_writes(mddev); |
5359 | 5374 | ||
@@ -5364,7 +5379,7 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) | |||
5364 | set_disk_ro(mddev->gendisk, 1); | 5379 | set_disk_ro(mddev->gendisk, 1); |
5365 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 5380 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
5366 | sysfs_notify_dirent_safe(mddev->sysfs_state); | 5381 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
5367 | err = 0; | 5382 | err = 0; |
5368 | } | 5383 | } |
5369 | out: | 5384 | out: |
5370 | mutex_unlock(&mddev->open_mutex); | 5385 | mutex_unlock(&mddev->open_mutex); |
@@ -5380,20 +5395,34 @@ static int do_md_stop(struct mddev * mddev, int mode, | |||
5380 | { | 5395 | { |
5381 | struct gendisk *disk = mddev->gendisk; | 5396 | struct gendisk *disk = mddev->gendisk; |
5382 | struct md_rdev *rdev; | 5397 | struct md_rdev *rdev; |
5398 | int did_freeze = 0; | ||
5399 | |||
5400 | if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) { | ||
5401 | did_freeze = 1; | ||
5402 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | ||
5403 | md_wakeup_thread(mddev->thread); | ||
5404 | } | ||
5405 | if (mddev->sync_thread) { | ||
5406 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
5407 | /* Thread might be blocked waiting for metadata update | ||
5408 | * which will now never happen */ | ||
5409 | wake_up_process(mddev->sync_thread->tsk); | ||
5410 | } | ||
5411 | mddev_unlock(mddev); | ||
5412 | wait_event(resync_wait, mddev->sync_thread == NULL); | ||
5413 | mddev_lock_nointr(mddev); | ||
5383 | 5414 | ||
5384 | mutex_lock(&mddev->open_mutex); | 5415 | mutex_lock(&mddev->open_mutex); |
5385 | if (atomic_read(&mddev->openers) > !!bdev || | 5416 | if (atomic_read(&mddev->openers) > !!bdev || |
5386 | mddev->sysfs_active) { | 5417 | mddev->sysfs_active || |
5418 | mddev->sync_thread || | ||
5419 | (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) { | ||
5387 | printk("md: %s still in use.\n",mdname(mddev)); | 5420 | printk("md: %s still in use.\n",mdname(mddev)); |
5388 | mutex_unlock(&mddev->open_mutex); | 5421 | mutex_unlock(&mddev->open_mutex); |
5389 | return -EBUSY; | 5422 | if (did_freeze) { |
5390 | } | 5423 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
5391 | if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) { | 5424 | md_wakeup_thread(mddev->thread); |
5392 | /* Someone opened the device since we flushed it | 5425 | } |
5393 | * so page cache could be dirty and it is too late | ||
5394 | * to flush. So abort | ||
5395 | */ | ||
5396 | mutex_unlock(&mddev->open_mutex); | ||
5397 | return -EBUSY; | 5426 | return -EBUSY; |
5398 | } | 5427 | } |
5399 | if (mddev->pers) { | 5428 | if (mddev->pers) { |
@@ -7931,6 +7960,7 @@ void md_reap_sync_thread(struct mddev *mddev) | |||
7931 | 7960 | ||
7932 | /* resync has finished, collect result */ | 7961 | /* resync has finished, collect result */ |
7933 | md_unregister_thread(&mddev->sync_thread); | 7962 | md_unregister_thread(&mddev->sync_thread); |
7963 | wake_up(&resync_wait); | ||
7934 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && | 7964 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && |
7935 | !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { | 7965 | !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { |
7936 | /* success...*/ | 7966 | /* success...*/ |