aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2008-07-21 03:05:25 -0400
committerNeilBrown <neilb@suse.de>2008-07-21 03:05:25 -0400
commit4b80991c6cb9efa607bc4fd6f3ecdf5511c31bb0 (patch)
tree5e2ba7d509af245c29bdf04b00960cc367972c44 /drivers/md/md.c
parentf2ea68cf42aafdd93393b6b8b20fc3c2b5f4390c (diff)
md: Protect access to mddev->disks list using RCU
All modifications and most access to the mddev->disks list are made under the reconfig_mutex lock. However there are three places where the list is walked without any locking. If a reconfig happens at this time, havoc (and oops) can ensue. So use RCU to protect these accesses: - wrap them in rcu_read_{,un}lock() - use list_for_each_entry_rcu - add to the list with list_add_rcu - delete from the list with list_del_rcu - delay the 'free' with call_rcu rather than schedule_work Note that export_rdev did a list_del_init on this list. In almost all cases the entry was not in the list anymore so it was a no-op and so safe. It is no longer safe as after list_del_rcu we may not touch the list_head. An audit shows that export_rdev is called: - after unbind_rdev_from_array, in which case the delete has already been done, - after bind_rdev_to_array fails, in which case the delete isn't needed. - before the device has been put on a list at all (e.g. in add_new_disk where reading the superblock fails). - and in autorun devices after a failure when the device is on a different list. So remove the list_del_init call from export_rdev, and add it back immediately before the called to export_rdev for that last case. Note also that ->same_set is sometimes used for lists other than mddev->list (e.g. candidates). In these cases rcu is not needed. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c30
1 files changed, 18 insertions, 12 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 450f30b6edf9..c2ff77ccec50 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1395,15 +1395,17 @@ static struct super_type super_types[] = {
1395 1395
1396static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) 1396static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
1397{ 1397{
1398 struct list_head *tmp, *tmp2;
1399 mdk_rdev_t *rdev, *rdev2; 1398 mdk_rdev_t *rdev, *rdev2;
1400 1399
1401 rdev_for_each(rdev, tmp, mddev1) 1400 rcu_read_lock();
1402 rdev_for_each(rdev2, tmp2, mddev2) 1401 rdev_for_each_rcu(rdev, mddev1)
1402 rdev_for_each_rcu(rdev2, mddev2)
1403 if (rdev->bdev->bd_contains == 1403 if (rdev->bdev->bd_contains ==
1404 rdev2->bdev->bd_contains) 1404 rdev2->bdev->bd_contains) {
1405 rcu_read_unlock();
1405 return 1; 1406 return 1;
1406 1407 }
1408 rcu_read_unlock();
1407 return 0; 1409 return 0;
1408} 1410}
1409 1411
@@ -1470,7 +1472,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1470 kobject_del(&rdev->kobj); 1472 kobject_del(&rdev->kobj);
1471 goto fail; 1473 goto fail;
1472 } 1474 }
1473 list_add(&rdev->same_set, &mddev->disks); 1475 list_add_rcu(&rdev->same_set, &mddev->disks);
1474 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); 1476 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
1475 return 0; 1477 return 0;
1476 1478
@@ -1495,14 +1497,16 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
1495 return; 1497 return;
1496 } 1498 }
1497 bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk); 1499 bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
1498 list_del_init(&rdev->same_set); 1500 list_del_rcu(&rdev->same_set);
1499 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); 1501 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
1500 rdev->mddev = NULL; 1502 rdev->mddev = NULL;
1501 sysfs_remove_link(&rdev->kobj, "block"); 1503 sysfs_remove_link(&rdev->kobj, "block");
1502 1504
1503 /* We need to delay this, otherwise we can deadlock when 1505 /* We need to delay this, otherwise we can deadlock when
1504 * writing to 'remove' to "dev/state" 1506 * writing to 'remove' to "dev/state". We also need
1507 * to delay it due to rcu usage.
1505 */ 1508 */
1509 synchronize_rcu();
1506 INIT_WORK(&rdev->del_work, md_delayed_delete); 1510 INIT_WORK(&rdev->del_work, md_delayed_delete);
1507 kobject_get(&rdev->kobj); 1511 kobject_get(&rdev->kobj);
1508 schedule_work(&rdev->del_work); 1512 schedule_work(&rdev->del_work);
@@ -1558,7 +1562,6 @@ static void export_rdev(mdk_rdev_t * rdev)
1558 if (rdev->mddev) 1562 if (rdev->mddev)
1559 MD_BUG(); 1563 MD_BUG();
1560 free_disk_sb(rdev); 1564 free_disk_sb(rdev);
1561 list_del_init(&rdev->same_set);
1562#ifndef MODULE 1565#ifndef MODULE
1563 if (test_bit(AutoDetected, &rdev->flags)) 1566 if (test_bit(AutoDetected, &rdev->flags))
1564 md_autodetect_dev(rdev->bdev->bd_dev); 1567 md_autodetect_dev(rdev->bdev->bd_dev);
@@ -4062,8 +4065,10 @@ static void autorun_devices(int part)
4062 /* on success, candidates will be empty, on error 4065 /* on success, candidates will be empty, on error
4063 * it won't... 4066 * it won't...
4064 */ 4067 */
4065 rdev_for_each_list(rdev, tmp, candidates) 4068 rdev_for_each_list(rdev, tmp, candidates) {
4069 list_del_init(&rdev->same_set);
4066 export_rdev(rdev); 4070 export_rdev(rdev);
4071 }
4067 mddev_put(mddev); 4072 mddev_put(mddev);
4068 } 4073 }
4069 printk(KERN_INFO "md: ... autorun DONE.\n"); 4074 printk(KERN_INFO "md: ... autorun DONE.\n");
@@ -5529,12 +5534,12 @@ int unregister_md_personality(struct mdk_personality *p)
5529static int is_mddev_idle(mddev_t *mddev) 5534static int is_mddev_idle(mddev_t *mddev)
5530{ 5535{
5531 mdk_rdev_t * rdev; 5536 mdk_rdev_t * rdev;
5532 struct list_head *tmp;
5533 int idle; 5537 int idle;
5534 long curr_events; 5538 long curr_events;
5535 5539
5536 idle = 1; 5540 idle = 1;
5537 rdev_for_each(rdev, tmp, mddev) { 5541 rcu_read_lock();
5542 rdev_for_each_rcu(rdev, mddev) {
5538 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; 5543 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
5539 curr_events = disk_stat_read(disk, sectors[0]) + 5544 curr_events = disk_stat_read(disk, sectors[0]) +
5540 disk_stat_read(disk, sectors[1]) - 5545 disk_stat_read(disk, sectors[1]) -
@@ -5566,6 +5571,7 @@ static int is_mddev_idle(mddev_t *mddev)
5566 idle = 0; 5571 idle = 0;
5567 } 5572 }
5568 } 5573 }
5574 rcu_read_unlock();
5569 return idle; 5575 return idle;
5570} 5576}
5571 5577