diff options
author | NeilBrown <neilb@suse.de> | 2008-07-21 03:05:25 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2008-07-21 03:05:25 -0400 |
commit | 4b80991c6cb9efa607bc4fd6f3ecdf5511c31bb0 (patch) | |
tree | 5e2ba7d509af245c29bdf04b00960cc367972c44 /drivers/md | |
parent | f2ea68cf42aafdd93393b6b8b20fc3c2b5f4390c (diff) |
md: Protect access to mddev->disks list using RCU
All modifications and most access to the mddev->disks list are made
under the reconfig_mutex lock. However there are three places where
the list is walked without any locking. If a reconfig happens at this
time, havoc (and oops) can ensue.
So use RCU to protect these accesses:
- wrap them in rcu_read_{,un}lock()
- use list_for_each_entry_rcu
- add to the list with list_add_rcu
- delete from the list with list_del_rcu
- delay the 'free' with call_rcu rather than schedule_work
Note that export_rdev did a list_del_init on this list. In almost all
cases the entry was not in the list anymore so it was a no-op and so
safe. It is no longer safe as after list_del_rcu we may not touch
the list_head.
An audit shows that export_rdev is called:
- after unbind_rdev_from_array, in which case the delete has
already been done,
- after bind_rdev_to_array fails, in which case the delete isn't needed.
- before the device has been put on a list at all (e.g. in
add_new_disk where reading the superblock fails).
- and in autorun devices after a failure when the device is on a
different list.
So remove the list_del_init call from export_rdev, and add it back
immediately before the called to export_rdev for that last case.
Note also that ->same_set is sometimes used for lists other than
mddev->list (e.g. candidates). In these cases rcu is not needed.
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 15 | ||||
-rw-r--r-- | drivers/md/md.c | 30 |
2 files changed, 28 insertions, 17 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index eba83e25b678..621a272a2c74 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -241,10 +241,10 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde | |||
241 | static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) | 241 | static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) |
242 | { | 242 | { |
243 | mdk_rdev_t *rdev; | 243 | mdk_rdev_t *rdev; |
244 | struct list_head *tmp; | ||
245 | mddev_t *mddev = bitmap->mddev; | 244 | mddev_t *mddev = bitmap->mddev; |
246 | 245 | ||
247 | rdev_for_each(rdev, tmp, mddev) | 246 | rcu_read_lock(); |
247 | rdev_for_each_rcu(rdev, mddev) | ||
248 | if (test_bit(In_sync, &rdev->flags) | 248 | if (test_bit(In_sync, &rdev->flags) |
249 | && !test_bit(Faulty, &rdev->flags)) { | 249 | && !test_bit(Faulty, &rdev->flags)) { |
250 | int size = PAGE_SIZE; | 250 | int size = PAGE_SIZE; |
@@ -260,11 +260,11 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) | |||
260 | + (long)(page->index * (PAGE_SIZE/512)) | 260 | + (long)(page->index * (PAGE_SIZE/512)) |
261 | + size/512 > 0) | 261 | + size/512 > 0) |
262 | /* bitmap runs in to metadata */ | 262 | /* bitmap runs in to metadata */ |
263 | return -EINVAL; | 263 | goto bad_alignment; |
264 | if (rdev->data_offset + mddev->size*2 | 264 | if (rdev->data_offset + mddev->size*2 |
265 | > rdev->sb_start + bitmap->offset) | 265 | > rdev->sb_start + bitmap->offset) |
266 | /* data runs in to bitmap */ | 266 | /* data runs in to bitmap */ |
267 | return -EINVAL; | 267 | goto bad_alignment; |
268 | } else if (rdev->sb_start < rdev->data_offset) { | 268 | } else if (rdev->sb_start < rdev->data_offset) { |
269 | /* METADATA BITMAP DATA */ | 269 | /* METADATA BITMAP DATA */ |
270 | if (rdev->sb_start | 270 | if (rdev->sb_start |
@@ -272,7 +272,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) | |||
272 | + page->index*(PAGE_SIZE/512) + size/512 | 272 | + page->index*(PAGE_SIZE/512) + size/512 |
273 | > rdev->data_offset) | 273 | > rdev->data_offset) |
274 | /* bitmap runs in to data */ | 274 | /* bitmap runs in to data */ |
275 | return -EINVAL; | 275 | goto bad_alignment; |
276 | } else { | 276 | } else { |
277 | /* DATA METADATA BITMAP - no problems */ | 277 | /* DATA METADATA BITMAP - no problems */ |
278 | } | 278 | } |
@@ -282,10 +282,15 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) | |||
282 | size, | 282 | size, |
283 | page); | 283 | page); |
284 | } | 284 | } |
285 | rcu_read_unlock(); | ||
285 | 286 | ||
286 | if (wait) | 287 | if (wait) |
287 | md_super_wait(mddev); | 288 | md_super_wait(mddev); |
288 | return 0; | 289 | return 0; |
290 | |||
291 | bad_alignment: | ||
292 | rcu_read_unlock(); | ||
293 | return -EINVAL; | ||
289 | } | 294 | } |
290 | 295 | ||
291 | static void bitmap_file_kick(struct bitmap *bitmap); | 296 | static void bitmap_file_kick(struct bitmap *bitmap); |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 450f30b6edf9..c2ff77ccec50 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -1395,15 +1395,17 @@ static struct super_type super_types[] = { | |||
1395 | 1395 | ||
1396 | static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) | 1396 | static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) |
1397 | { | 1397 | { |
1398 | struct list_head *tmp, *tmp2; | ||
1399 | mdk_rdev_t *rdev, *rdev2; | 1398 | mdk_rdev_t *rdev, *rdev2; |
1400 | 1399 | ||
1401 | rdev_for_each(rdev, tmp, mddev1) | 1400 | rcu_read_lock(); |
1402 | rdev_for_each(rdev2, tmp2, mddev2) | 1401 | rdev_for_each_rcu(rdev, mddev1) |
1402 | rdev_for_each_rcu(rdev2, mddev2) | ||
1403 | if (rdev->bdev->bd_contains == | 1403 | if (rdev->bdev->bd_contains == |
1404 | rdev2->bdev->bd_contains) | 1404 | rdev2->bdev->bd_contains) { |
1405 | rcu_read_unlock(); | ||
1405 | return 1; | 1406 | return 1; |
1406 | 1407 | } | |
1408 | rcu_read_unlock(); | ||
1407 | return 0; | 1409 | return 0; |
1408 | } | 1410 | } |
1409 | 1411 | ||
@@ -1470,7 +1472,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1470 | kobject_del(&rdev->kobj); | 1472 | kobject_del(&rdev->kobj); |
1471 | goto fail; | 1473 | goto fail; |
1472 | } | 1474 | } |
1473 | list_add(&rdev->same_set, &mddev->disks); | 1475 | list_add_rcu(&rdev->same_set, &mddev->disks); |
1474 | bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); | 1476 | bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); |
1475 | return 0; | 1477 | return 0; |
1476 | 1478 | ||
@@ -1495,14 +1497,16 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) | |||
1495 | return; | 1497 | return; |
1496 | } | 1498 | } |
1497 | bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk); | 1499 | bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk); |
1498 | list_del_init(&rdev->same_set); | 1500 | list_del_rcu(&rdev->same_set); |
1499 | printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); | 1501 | printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); |
1500 | rdev->mddev = NULL; | 1502 | rdev->mddev = NULL; |
1501 | sysfs_remove_link(&rdev->kobj, "block"); | 1503 | sysfs_remove_link(&rdev->kobj, "block"); |
1502 | 1504 | ||
1503 | /* We need to delay this, otherwise we can deadlock when | 1505 | /* We need to delay this, otherwise we can deadlock when |
1504 | * writing to 'remove' to "dev/state" | 1506 | * writing to 'remove' to "dev/state". We also need |
1507 | * to delay it due to rcu usage. | ||
1505 | */ | 1508 | */ |
1509 | synchronize_rcu(); | ||
1506 | INIT_WORK(&rdev->del_work, md_delayed_delete); | 1510 | INIT_WORK(&rdev->del_work, md_delayed_delete); |
1507 | kobject_get(&rdev->kobj); | 1511 | kobject_get(&rdev->kobj); |
1508 | schedule_work(&rdev->del_work); | 1512 | schedule_work(&rdev->del_work); |
@@ -1558,7 +1562,6 @@ static void export_rdev(mdk_rdev_t * rdev) | |||
1558 | if (rdev->mddev) | 1562 | if (rdev->mddev) |
1559 | MD_BUG(); | 1563 | MD_BUG(); |
1560 | free_disk_sb(rdev); | 1564 | free_disk_sb(rdev); |
1561 | list_del_init(&rdev->same_set); | ||
1562 | #ifndef MODULE | 1565 | #ifndef MODULE |
1563 | if (test_bit(AutoDetected, &rdev->flags)) | 1566 | if (test_bit(AutoDetected, &rdev->flags)) |
1564 | md_autodetect_dev(rdev->bdev->bd_dev); | 1567 | md_autodetect_dev(rdev->bdev->bd_dev); |
@@ -4062,8 +4065,10 @@ static void autorun_devices(int part) | |||
4062 | /* on success, candidates will be empty, on error | 4065 | /* on success, candidates will be empty, on error |
4063 | * it won't... | 4066 | * it won't... |
4064 | */ | 4067 | */ |
4065 | rdev_for_each_list(rdev, tmp, candidates) | 4068 | rdev_for_each_list(rdev, tmp, candidates) { |
4069 | list_del_init(&rdev->same_set); | ||
4066 | export_rdev(rdev); | 4070 | export_rdev(rdev); |
4071 | } | ||
4067 | mddev_put(mddev); | 4072 | mddev_put(mddev); |
4068 | } | 4073 | } |
4069 | printk(KERN_INFO "md: ... autorun DONE.\n"); | 4074 | printk(KERN_INFO "md: ... autorun DONE.\n"); |
@@ -5529,12 +5534,12 @@ int unregister_md_personality(struct mdk_personality *p) | |||
5529 | static int is_mddev_idle(mddev_t *mddev) | 5534 | static int is_mddev_idle(mddev_t *mddev) |
5530 | { | 5535 | { |
5531 | mdk_rdev_t * rdev; | 5536 | mdk_rdev_t * rdev; |
5532 | struct list_head *tmp; | ||
5533 | int idle; | 5537 | int idle; |
5534 | long curr_events; | 5538 | long curr_events; |
5535 | 5539 | ||
5536 | idle = 1; | 5540 | idle = 1; |
5537 | rdev_for_each(rdev, tmp, mddev) { | 5541 | rcu_read_lock(); |
5542 | rdev_for_each_rcu(rdev, mddev) { | ||
5538 | struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; | 5543 | struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; |
5539 | curr_events = disk_stat_read(disk, sectors[0]) + | 5544 | curr_events = disk_stat_read(disk, sectors[0]) + |
5540 | disk_stat_read(disk, sectors[1]) - | 5545 | disk_stat_read(disk, sectors[1]) - |
@@ -5566,6 +5571,7 @@ static int is_mddev_idle(mddev_t *mddev) | |||
5566 | idle = 0; | 5571 | idle = 0; |
5567 | } | 5572 | } |
5568 | } | 5573 | } |
5574 | rcu_read_unlock(); | ||
5569 | return idle; | 5575 | return idle; |
5570 | } | 5576 | } |
5571 | 5577 | ||