aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-05-10 03:49:01 -0400
committerNeilBrown <neilb@suse.de>2011-05-11 00:26:17 -0400
commitb0140891a8cea36469f58d23859e599b1122bd37 (patch)
tree01f378d9964c1d24683a3c42bfd06b1da7d985b6
parent693d92a1bbc9e42681c42ed190bd42b636ca876f (diff)
md: Fix race when creating a new md device.
There is a race when creating an md device by opening /dev/mdXX. If two processes do this at much the same time they will follow the call path __blkdev_get -> get_gendisk -> kobj_lookup The first will call -> md_probe -> md_alloc -> add_disk -> blk_register_region and the race happens when the second gets to kobj_lookup after add_disk has called blk_register_region but before it returns to md_alloc. In the case the second will not call md_probe (as the probe is already done) but will get a handle on the gendisk, return to __blkdev_get which will then call md_open (via the ->open) pointer. As mddev->gendisk hasn't been set yet, md_open will think something is wrong an return with ERESTARTSYS. This can loop endlessly while the first thread makes no progress through add_disk. Nothing is blocking it, but due to scheduler behaviour it doesn't get a turn. So this is essentially a live-lock. We fix this by simply moving the assignment to mddev->gendisk before the call the add_disk() so md_open doesn't get confused. Also move blk_queue_flush earlier because add_disk should be as late as possible. To make sure that md_open doesn't complete until md_alloc has done all that is needed, we take mddev->open_mutex during the last part of md_alloc. md_open will wait for this. This can cause a lock-up on boot so Cc:ing for stable. For 2.6.36 and earlier a different patch will be needed as the 'blk_queue_flush' call isn't there. Signed-off-by: NeilBrown <neilb@suse.de> Reported-by: Thomas Jarosch <thomas.jarosch@intra2net.com> Tested-by: Thomas Jarosch <thomas.jarosch@intra2net.com> Cc: stable@kernel.org
-rw-r--r--drivers/md/md.c11
1 files changed, 8 insertions, 3 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 7d6f7f18a920..4a4c0f80bdeb 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4347,13 +4347,19 @@ static int md_alloc(dev_t dev, char *name)
4347 disk->fops = &md_fops; 4347 disk->fops = &md_fops;
4348 disk->private_data = mddev; 4348 disk->private_data = mddev;
4349 disk->queue = mddev->queue; 4349 disk->queue = mddev->queue;
4350 blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
4350 /* Allow extended partitions. This makes the 4351 /* Allow extended partitions. This makes the
4351 * 'mdp' device redundant, but we can't really 4352 * 'mdp' device redundant, but we can't really
4352 * remove it now. 4353 * remove it now.
4353 */ 4354 */
4354 disk->flags |= GENHD_FL_EXT_DEVT; 4355 disk->flags |= GENHD_FL_EXT_DEVT;
4355 add_disk(disk);
4356 mddev->gendisk = disk; 4356 mddev->gendisk = disk;
4357 /* As soon as we call add_disk(), another thread could get
4358 * through to md_open, so make sure it doesn't get too far
4359 */
4360 mutex_lock(&mddev->open_mutex);
4361 add_disk(disk);
4362
4357 error = kobject_init_and_add(&mddev->kobj, &md_ktype, 4363 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
4358 &disk_to_dev(disk)->kobj, "%s", "md"); 4364 &disk_to_dev(disk)->kobj, "%s", "md");
4359 if (error) { 4365 if (error) {
@@ -4367,8 +4373,7 @@ static int md_alloc(dev_t dev, char *name)
4367 if (mddev->kobj.sd && 4373 if (mddev->kobj.sd &&
4368 sysfs_create_group(&mddev->kobj, &md_bitmap_group)) 4374 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
4369 printk(KERN_DEBUG "pointless warning\n"); 4375 printk(KERN_DEBUG "pointless warning\n");
4370 4376 mutex_unlock(&mddev->open_mutex);
4371 blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
4372 abort: 4377 abort:
4373 mutex_unlock(&disks_mutex); 4378 mutex_unlock(&disks_mutex);
4374 if (!error && mddev->kobj.sd) { 4379 if (!error && mddev->kobj.sd) {