aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMikulas Patocka <mpatocka@redhat.com>2009-06-22 05:12:17 -0400
committerAlasdair G Kergon <agk@redhat.com>2009-06-22 05:12:17 -0400
commit32a926da5a16c01a8213331e5764472ce2f14a8d (patch)
treeedac1cef90e4754353a50fb948c4e34b4df8b5bc
parentdb8fef4fabe4a546ce74f80bff64fd43776e5912 (diff)
dm: always hold bdev reference
Fix a potential deadlock when creating multiple snapshots by holding a reference to struct block_device for the whole lifecycle of every dm device instead of obtaining it independently at each point it is needed. bdget_disk() was called while the device was being suspended, in dm_suspend(). However there could be other devices already suspended, for example when creating additional snapshots of a device. bdget_disk() can wait for IO and allocate memory resulting in waiting for the already-suspended device - deadlock. This patch changes the code so that it gets the reference to struct block_device when struct mapped_device is allocated and initialized in alloc_dev() where it is always OK to allocate memory or wait for I/O. It drops the reference when it is destroyed in free_dev(). Thus there is no call to bdget_disk() while any device is suspended. Previously unlock_fs() was called only if bdev was held. Now it is called unconditionally, but the superfluous calls are harmless because it returns immediately if the filesystem was not previously frozen. This patch also now allows the device size to be changed in a noflush suspend because the bdev is held. This has no adverse effect. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
-rw-r--r--drivers/md/dm.c57
1 files changed, 16 insertions, 41 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 1cfd9b72403..5e06f1e6234 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1180,6 +1180,10 @@ static struct mapped_device *alloc_dev(int minor)
1180 if (!md->wq) 1180 if (!md->wq)
1181 goto bad_thread; 1181 goto bad_thread;
1182 1182
1183 md->bdev = bdget_disk(md->disk, 0);
1184 if (!md->bdev)
1185 goto bad_bdev;
1186
1183 /* Populate the mapping, nobody knows we exist yet */ 1187 /* Populate the mapping, nobody knows we exist yet */
1184 spin_lock(&_minor_lock); 1188 spin_lock(&_minor_lock);
1185 old_md = idr_replace(&_minor_idr, md, minor); 1189 old_md = idr_replace(&_minor_idr, md, minor);
@@ -1189,6 +1193,8 @@ static struct mapped_device *alloc_dev(int minor)
1189 1193
1190 return md; 1194 return md;
1191 1195
1196bad_bdev:
1197 destroy_workqueue(md->wq);
1192bad_thread: 1198bad_thread:
1193 put_disk(md->disk); 1199 put_disk(md->disk);
1194bad_disk: 1200bad_disk:
@@ -1214,10 +1220,8 @@ static void free_dev(struct mapped_device *md)
1214{ 1220{
1215 int minor = MINOR(disk_devt(md->disk)); 1221 int minor = MINOR(disk_devt(md->disk));
1216 1222
1217 if (md->bdev) { 1223 unlock_fs(md);
1218 unlock_fs(md); 1224 bdput(md->bdev);
1219 bdput(md->bdev);
1220 }
1221 destroy_workqueue(md->wq); 1225 destroy_workqueue(md->wq);
1222 mempool_destroy(md->tio_pool); 1226 mempool_destroy(md->tio_pool);
1223 mempool_destroy(md->io_pool); 1227 mempool_destroy(md->io_pool);
@@ -1277,8 +1281,7 @@ static int __bind(struct mapped_device *md, struct dm_table *t)
1277 if (size != get_capacity(md->disk)) 1281 if (size != get_capacity(md->disk))
1278 memset(&md->geometry, 0, sizeof(md->geometry)); 1282 memset(&md->geometry, 0, sizeof(md->geometry));
1279 1283
1280 if (md->bdev) 1284 __set_size(md, size);
1281 __set_size(md, size);
1282 1285
1283 if (!size) { 1286 if (!size) {
1284 dm_table_destroy(t); 1287 dm_table_destroy(t);
@@ -1520,11 +1523,6 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table)
1520 if (!dm_suspended(md)) 1523 if (!dm_suspended(md))
1521 goto out; 1524 goto out;
1522 1525
1523 /* without bdev, the device size cannot be changed */
1524 if (!md->bdev)
1525 if (get_capacity(md->disk) != dm_table_get_size(table))
1526 goto out;
1527
1528 __unbind(md); 1526 __unbind(md);
1529 r = __bind(md, table); 1527 r = __bind(md, table);
1530 1528
@@ -1552,9 +1550,6 @@ static int lock_fs(struct mapped_device *md)
1552 1550
1553 set_bit(DMF_FROZEN, &md->flags); 1551 set_bit(DMF_FROZEN, &md->flags);
1554 1552
1555 /* don't bdput right now, we don't want the bdev
1556 * to go away while it is locked.
1557 */
1558 return 0; 1553 return 0;
1559} 1554}
1560 1555
@@ -1601,24 +1596,14 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1601 /* This does not get reverted if there's an error later. */ 1596 /* This does not get reverted if there's an error later. */
1602 dm_table_presuspend_targets(map); 1597 dm_table_presuspend_targets(map);
1603 1598
1604 /* bdget() can stall if the pending I/Os are not flushed */ 1599 /*
1605 if (!noflush) { 1600 * Flush I/O to the device. noflush supersedes do_lockfs,
1606 md->bdev = bdget_disk(md->disk, 0); 1601 * because lock_fs() needs to flush I/Os.
1607 if (!md->bdev) { 1602 */
1608 DMWARN("bdget failed in dm_suspend"); 1603 if (!noflush && do_lockfs) {
1609 r = -ENOMEM; 1604 r = lock_fs(md);
1605 if (r)
1610 goto out; 1606 goto out;
1611 }
1612
1613 /*
1614 * Flush I/O to the device. noflush supersedes do_lockfs,
1615 * because lock_fs() needs to flush I/Os.
1616 */
1617 if (do_lockfs) {
1618 r = lock_fs(md);
1619 if (r)
1620 goto out;
1621 }
1622 } 1607 }
1623 1608
1624 /* 1609 /*
@@ -1675,11 +1660,6 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1675 set_bit(DMF_SUSPENDED, &md->flags); 1660 set_bit(DMF_SUSPENDED, &md->flags);
1676 1661
1677out: 1662out:
1678 if (r && md->bdev) {
1679 bdput(md->bdev);
1680 md->bdev = NULL;
1681 }
1682
1683 dm_table_put(map); 1663 dm_table_put(map);
1684 1664
1685out_unlock: 1665out_unlock:
@@ -1708,11 +1688,6 @@ int dm_resume(struct mapped_device *md)
1708 1688
1709 unlock_fs(md); 1689 unlock_fs(md);
1710 1690
1711 if (md->bdev) {
1712 bdput(md->bdev);
1713 md->bdev = NULL;
1714 }
1715
1716 clear_bit(DMF_SUSPENDED, &md->flags); 1691 clear_bit(DMF_SUSPENDED, &md->flags);
1717 1692
1718 dm_table_unplug_all(map); 1693 dm_table_unplug_all(map);