aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2010-02-09 00:34:14 -0500
committerNeilBrown <neilb@suse.de>2010-02-09 19:26:09 -0500
commitef286f6fa673cd7fb367e1b145069d8dbfcc6081 (patch)
tree957760503c1b1a417850db3204d6c010d1ce1a67
parent9eb07c259207d048e3ee8be2a77b2a4680b1edd4 (diff)
md: fix some lockdep issues between md and sysfs.
====== This fix is related to http://bugzilla.kernel.org/show_bug.cgi?id=15142 but does not address that exact issue. ====== sysfs does like attributes being removed while they are being accessed (i.e. read or written) and waits for the access to complete. As accessing some md attributes takes the same lock that is held while removing those attributes a deadlock can occur. This patch addresses 3 issues in md that could lead to this deadlock. Two relate to calling flush_scheduled_work while the lock is held. This is probably a bad idea in general and as we use schedule_work to delete various sysfs objects it is particularly bad. In one case flush_scheduled_work is called from md_alloc (called by md_probe) called from do_md_run which holds the lock. This call is only present to ensure that ->gendisk is set. However we can be sure that gendisk is always set (though possibly we couldn't when that code was originally written. This is because do_md_run is called in three different contexts: 1/ from md_ioctl. This requires that md_open has succeeded, and it fails if ->gendisk is not set. 2/ from writing a sysfs attribute. This can only happen if the mddev has been registered in sysfs which happens in md_alloc after ->gendisk has been set. 3/ from autorun_array which is only called by autorun_devices, which checks for ->gendisk to be set before calling autorun_array. So the call to md_probe in do_md_run can be removed, and the check on ->gendisk can also go. In the other case flush_scheduled_work is being called in do_md_stop, purportedly to wait for all md_delayed_delete calls (which delete the component rdevs) to complete. However there really isn't any need to wait for them - they have already been disconnected in all important ways. The third issue is that raid5->stop() removes some attribute names while the lock is held. There is already some infrastructure in place to delay attribute removal until after the lock is released (using schedule_work). So extend that infrastructure to remove the raid5_attrs_group. This does not address all lockdep issues related to the sysfs "s_active" lock. The rest can be address by splitting that lockdep context between symlinks and non-symlinks which hopefully will happen. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/md.c14
-rw-r--r--drivers/md/raid5.c3
2 files changed, 6 insertions, 11 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index dd3dfe42d5a9..a20a71e5efd3 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4075,8 +4075,10 @@ static void mddev_delayed_delete(struct work_struct *ws)
4075{ 4075{
4076 mddev_t *mddev = container_of(ws, mddev_t, del_work); 4076 mddev_t *mddev = container_of(ws, mddev_t, del_work);
4077 4077
4078 if (mddev->private == &md_redundancy_group) { 4078 if (mddev->private) {
4079 sysfs_remove_group(&mddev->kobj, &md_redundancy_group); 4079 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
4080 if (mddev->private != (void*)1)
4081 sysfs_remove_group(&mddev->kobj, mddev->private);
4080 if (mddev->sysfs_action) 4082 if (mddev->sysfs_action)
4081 sysfs_put(mddev->sysfs_action); 4083 sysfs_put(mddev->sysfs_action);
4082 mddev->sysfs_action = NULL; 4084 mddev->sysfs_action = NULL;
@@ -4287,10 +4289,7 @@ static int do_md_run(mddev_t * mddev)
4287 sysfs_notify_dirent(rdev->sysfs_state); 4289 sysfs_notify_dirent(rdev->sysfs_state);
4288 } 4290 }
4289 4291
4290 md_probe(mddev->unit, NULL, NULL);
4291 disk = mddev->gendisk; 4292 disk = mddev->gendisk;
4292 if (!disk)
4293 return -ENOMEM;
4294 4293
4295 spin_lock(&pers_lock); 4294 spin_lock(&pers_lock);
4296 pers = find_pers(mddev->level, mddev->clevel); 4295 pers = find_pers(mddev->level, mddev->clevel);
@@ -4530,8 +4529,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4530 mddev->queue->unplug_fn = NULL; 4529 mddev->queue->unplug_fn = NULL;
4531 mddev->queue->backing_dev_info.congested_fn = NULL; 4530 mddev->queue->backing_dev_info.congested_fn = NULL;
4532 module_put(mddev->pers->owner); 4531 module_put(mddev->pers->owner);
4533 if (mddev->pers->sync_request) 4532 if (mddev->pers->sync_request && mddev->private == NULL)
4534 mddev->private = &md_redundancy_group; 4533 mddev->private = (void*)1;
4535 mddev->pers = NULL; 4534 mddev->pers = NULL;
4536 /* tell userspace to handle 'inactive' */ 4535 /* tell userspace to handle 'inactive' */
4537 sysfs_notify_dirent(mddev->sysfs_state); 4536 sysfs_notify_dirent(mddev->sysfs_state);
@@ -4578,9 +4577,6 @@ out:
4578 } 4577 }
4579 mddev->bitmap_info.offset = 0; 4578 mddev->bitmap_info.offset = 0;
4580 4579
4581 /* make sure all md_delayed_delete calls have finished */
4582 flush_scheduled_work();
4583
4584 export_array(mddev); 4580 export_array(mddev);
4585 4581
4586 mddev->array_sectors = 0; 4582 mddev->array_sectors = 0;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index b5629c3e14fa..ceb24afdc147 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5136,9 +5136,8 @@ static int stop(mddev_t *mddev)
5136 mddev->thread = NULL; 5136 mddev->thread = NULL;
5137 mddev->queue->backing_dev_info.congested_fn = NULL; 5137 mddev->queue->backing_dev_info.congested_fn = NULL;
5138 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ 5138 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
5139 sysfs_remove_group(&mddev->kobj, &raid5_attrs_group);
5140 free_conf(conf); 5139 free_conf(conf);
5141 mddev->private = NULL; 5140 mddev->private = &raid5_attrs_group;
5142 return 0; 5141 return 0;
5143} 5142}
5144 5143