diff options
author | Jun'ichi Nomura <j-nomura@ce.jp.nec.com> | 2007-01-26 03:57:07 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-01-26 16:51:00 -0500 |
commit | bfa152fa5e4d328fe3ebf15908ee8ec20a0ce6dc (patch) | |
tree | 1695481328f884639d6237eebfa48f4c29f9ce7c | |
parent | e540eb45a5254873245fd377f2fe3afc47bd33c1 (diff) |
[PATCH] dm-multipath: fix stall on noflush suspend/resume
Allow noflush suspend/resume of device-mapper device only for the case
where the device size is unchanged.
Otherwise, dm-multipath devices can stall when resumed if noflush was used
when suspending them, all paths have failed and queue_if_no_path is set.
Explanation:
1. Something is doing fsync() on the block dev,
holding inode->i_sem
2. The fsync write is blocked by all-paths-down and queue_if_no_path
3. Someone requests to suspend the dm device with noflush.
Pending writes are left in queue.
4. In the middle of dm_resume(), __bind() tries to get
inode->i_sem to do __set_size() and waits forever.
'noflush suspend' is a new device-mapper feature introduced in
early 2.6.20. So I hope the fix being included before 2.6.20 is
released.
Example of reproducer:
1. Create a multipath device by dmsetup
2. Fail all paths during mkfs
3. Do dmsetup suspend --noflush and load new map with healthy paths
4. Do dmsetup resume
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | drivers/md/dm.c | 27 |
1 files changed, 19 insertions, 8 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index fe7c56e10435..3668b170ea68 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -1116,7 +1116,8 @@ static int __bind(struct mapped_device *md, struct dm_table *t) | |||
1116 | if (size != get_capacity(md->disk)) | 1116 | if (size != get_capacity(md->disk)) |
1117 | memset(&md->geometry, 0, sizeof(md->geometry)); | 1117 | memset(&md->geometry, 0, sizeof(md->geometry)); |
1118 | 1118 | ||
1119 | __set_size(md, size); | 1119 | if (md->suspended_bdev) |
1120 | __set_size(md, size); | ||
1120 | if (size == 0) | 1121 | if (size == 0) |
1121 | return 0; | 1122 | return 0; |
1122 | 1123 | ||
@@ -1264,6 +1265,11 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table) | |||
1264 | if (!dm_suspended(md)) | 1265 | if (!dm_suspended(md)) |
1265 | goto out; | 1266 | goto out; |
1266 | 1267 | ||
1268 | /* without bdev, the device size cannot be changed */ | ||
1269 | if (!md->suspended_bdev) | ||
1270 | if (get_capacity(md->disk) != dm_table_get_size(table)) | ||
1271 | goto out; | ||
1272 | |||
1267 | __unbind(md); | 1273 | __unbind(md); |
1268 | r = __bind(md, table); | 1274 | r = __bind(md, table); |
1269 | 1275 | ||
@@ -1341,11 +1347,14 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1341 | /* This does not get reverted if there's an error later. */ | 1347 | /* This does not get reverted if there's an error later. */ |
1342 | dm_table_presuspend_targets(map); | 1348 | dm_table_presuspend_targets(map); |
1343 | 1349 | ||
1344 | md->suspended_bdev = bdget_disk(md->disk, 0); | 1350 | /* bdget() can stall if the pending I/Os are not flushed */ |
1345 | if (!md->suspended_bdev) { | 1351 | if (!noflush) { |
1346 | DMWARN("bdget failed in dm_suspend"); | 1352 | md->suspended_bdev = bdget_disk(md->disk, 0); |
1347 | r = -ENOMEM; | 1353 | if (!md->suspended_bdev) { |
1348 | goto flush_and_out; | 1354 | DMWARN("bdget failed in dm_suspend"); |
1355 | r = -ENOMEM; | ||
1356 | goto flush_and_out; | ||
1357 | } | ||
1349 | } | 1358 | } |
1350 | 1359 | ||
1351 | /* | 1360 | /* |
@@ -1473,8 +1482,10 @@ int dm_resume(struct mapped_device *md) | |||
1473 | 1482 | ||
1474 | unlock_fs(md); | 1483 | unlock_fs(md); |
1475 | 1484 | ||
1476 | bdput(md->suspended_bdev); | 1485 | if (md->suspended_bdev) { |
1477 | md->suspended_bdev = NULL; | 1486 | bdput(md->suspended_bdev); |
1487 | md->suspended_bdev = NULL; | ||
1488 | } | ||
1478 | 1489 | ||
1479 | clear_bit(DMF_SUSPENDED, &md->flags); | 1490 | clear_bit(DMF_SUSPENDED, &md->flags); |
1480 | 1491 | ||