aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2009-08-09 22:50:52 -0400
committerNeilBrown <neilb@suse.de>2009-08-09 22:50:52 -0400
commitc8c00a6915a2e3d10416e8bdd3138429beb96210 (patch)
treeecf06a9e2b08edefe707da450b52a69f818ec7d6
parent7b2aa037e878c939676675969983284a02958ae3 (diff)
Remove deadlock potential in md_open
A recent commit: commit 449aad3e25358812c43afc60918c5ad3819488e7 introduced the possibility of an A-B/B-A deadlock between bd_mutex and reconfig_mutex. __blkdev_get holds bd_mutex while calling md_open which takes reconfig_mutex, do_md_run is always called with reconfig_mutex held, and it now takes bd_mutex in the call the revalidate_disk. This potential deadlock was not caught by lockdep due to the use of mutex_lock_interruptible_nexted which was introduced by commit d63a5a74dee87883fda6b7d170244acaac5b05e8 do avoid a warning of an impossible deadlock. It is quite possible to split reconfig_mutex in to two locks. One protects the array data structures while it is being reconfigured, the other ensures that an array is never even partially open while it is being deactivated. In particular, the second lock prevents an open from completing between the time when do_md_stop checks if there are any active opens, and the time when the array is either set read-only, or when ->pers is set to NULL. So we can be certain that no IO is in flight as the array is being destroyed. So create a new lock, open_mutex, just to ensure exclusion between 'open' and 'stop'. This avoids the deadlock and also avoids the lockdep warning mentioned in commit d63a5a74d Reported-by: "Mike Snitzer" <snitzer@gmail.com> Reported-by: "H. Peter Anvin" <hpa@zytor.com> Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/md.c18
-rw-r--r--drivers/md/md.h10
2 files changed, 20 insertions, 8 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5b98bea4ff9..5614500092e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit)
359 else 359 else
360 new->md_minor = MINOR(unit) >> MdpMinorShift; 360 new->md_minor = MINOR(unit) >> MdpMinorShift;
361 361
362 mutex_init(&new->open_mutex);
362 mutex_init(&new->reconfig_mutex); 363 mutex_init(&new->reconfig_mutex);
363 INIT_LIST_HEAD(&new->disks); 364 INIT_LIST_HEAD(&new->disks);
364 INIT_LIST_HEAD(&new->all_mddevs); 365 INIT_LIST_HEAD(&new->all_mddevs);
@@ -4304,12 +4305,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4304 struct gendisk *disk = mddev->gendisk; 4305 struct gendisk *disk = mddev->gendisk;
4305 mdk_rdev_t *rdev; 4306 mdk_rdev_t *rdev;
4306 4307
4308 mutex_lock(&mddev->open_mutex);
4307 if (atomic_read(&mddev->openers) > is_open) { 4309 if (atomic_read(&mddev->openers) > is_open) {
4308 printk("md: %s still in use.\n",mdname(mddev)); 4310 printk("md: %s still in use.\n",mdname(mddev));
4309 return -EBUSY; 4311 err = -EBUSY;
4310 } 4312 } else if (mddev->pers) {
4311
4312 if (mddev->pers) {
4313 4313
4314 if (mddev->sync_thread) { 4314 if (mddev->sync_thread) {
4315 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4315 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -4367,7 +4367,10 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4367 set_disk_ro(disk, 1); 4367 set_disk_ro(disk, 1);
4368 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4368 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4369 } 4369 }
4370 4370out:
4371 mutex_unlock(&mddev->open_mutex);
4372 if (err)
4373 return err;
4371 /* 4374 /*
4372 * Free resources if final stop 4375 * Free resources if final stop
4373 */ 4376 */
@@ -4433,7 +4436,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4433 blk_integrity_unregister(disk); 4436 blk_integrity_unregister(disk);
4434 md_new_event(mddev); 4437 md_new_event(mddev);
4435 sysfs_notify_dirent(mddev->sysfs_state); 4438 sysfs_notify_dirent(mddev->sysfs_state);
4436out:
4437 return err; 4439 return err;
4438} 4440}
4439 4441
@@ -5518,12 +5520,12 @@ static int md_open(struct block_device *bdev, fmode_t mode)
5518 } 5520 }
5519 BUG_ON(mddev != bdev->bd_disk->private_data); 5521 BUG_ON(mddev != bdev->bd_disk->private_data);
5520 5522
5521 if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) 5523 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
5522 goto out; 5524 goto out;
5523 5525
5524 err = 0; 5526 err = 0;
5525 atomic_inc(&mddev->openers); 5527 atomic_inc(&mddev->openers);
5526 mddev_unlock(mddev); 5528 mutex_unlock(&mddev->open_mutex);
5527 5529
5528 check_disk_change(bdev); 5530 check_disk_change(bdev);
5529 out: 5531 out:
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 78f03168baf..f8fc188bc76 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -223,6 +223,16 @@ struct mddev_s
223 * so we don't loop trying */ 223 * so we don't loop trying */
224 224
225 int in_sync; /* know to not need resync */ 225 int in_sync; /* know to not need resync */
226 /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
227 * that we are never stopping an array while it is open.
228 * 'reconfig_mutex' protects all other reconfiguration.
229 * These locks are separate due to conflicting interactions
230 * with bdev->bd_mutex.
231 * Lock ordering is:
232 * reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk
233 * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open
234 */
235 struct mutex open_mutex;
226 struct mutex reconfig_mutex; 236 struct mutex reconfig_mutex;
227 atomic_t active; /* general refcount */ 237 atomic_t active; /* general refcount */
228 atomic_t openers; /* number of active opens */ 238 atomic_t openers; /* number of active opens */