aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd/drbd_nl.c
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2010-10-16 06:13:47 -0400
committerPhilipp Reisner <philipp.reisner@linbit.com>2010-10-22 09:46:11 -0400
commit82f59cc6353889b426cf13b6596d5a3d100fa09e (patch)
tree6d5a678516334f0a37a56a509b84322a0352719b /drivers/block/drbd/drbd_nl.c
parent3beec1d446fba335f07787636920892dd3b2c658 (diff)
drbd: fix potential deadlock on detach
If we have contention in drbd_al_begin_iod (heavy randon IO), an administrative request to detach the disk may deadlock for similar reasons as the recently fixed deadlock if detaching because of IO-error. The approach taken here is to either go through the intermediate cleanup state D_FAILED, or first lock out application io, don't just go directly to D_DISKLESS. We need an additional state bit (WAS_IO_ERROR) to distinguish the -> D_FAILED because of IO-error from other failures. Sanitize D_ATTACHING -> D_FAILED to D_ATTACHING -> D_DISKLESS. If only attaching, ldev may be missing still, but would be referenced from within the after_state_ch for -> D_FAILED, potentially dereferencing a NULL pointer. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd/drbd_nl.c')
-rw-r--r--drivers/block/drbd/drbd_nl.c16
1 files changed, 15 insertions, 1 deletions
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index c498c4827de4..0cba7d3d2b5d 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -870,6 +870,11 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
870 retcode = ERR_DISK_CONFIGURED; 870 retcode = ERR_DISK_CONFIGURED;
871 goto fail; 871 goto fail;
872 } 872 }
873 /* It may just now have detached because of IO error. Make sure
874 * drbd_ldev_destroy is done already, we may end up here very fast,
875 * e.g. if someone calls attach from the on-io-error handler,
876 * to realize a "hot spare" feature (not that I'd recommend that) */
877 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
873 878
874 /* allocation not in the IO path, cqueue thread context */ 879 /* allocation not in the IO path, cqueue thread context */
875 nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); 880 nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
@@ -1262,7 +1267,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1262 force_diskless_dec: 1267 force_diskless_dec:
1263 put_ldev(mdev); 1268 put_ldev(mdev);
1264 force_diskless: 1269 force_diskless:
1265 drbd_force_state(mdev, NS(disk, D_DISKLESS)); 1270 drbd_force_state(mdev, NS(disk, D_FAILED));
1266 drbd_md_sync(mdev); 1271 drbd_md_sync(mdev);
1267 release_bdev2_fail: 1272 release_bdev2_fail:
1268 if (nbc) 1273 if (nbc)
@@ -1285,10 +1290,19 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1285 return 0; 1290 return 0;
1286} 1291}
1287 1292
1293/* Detaching the disk is a process in multiple stages. First we need to lock
1294 * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1295 * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1296 * internal references as well.
1297 * Only then we have finally detached. */
1288static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 1298static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1289 struct drbd_nl_cfg_reply *reply) 1299 struct drbd_nl_cfg_reply *reply)
1290{ 1300{
1301 drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
1291 reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS)); 1302 reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS));
1303 if (mdev->state.disk == D_DISKLESS)
1304 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
1305 drbd_resume_io(mdev);
1292 return 0; 1306 return 0;
1293} 1307}
1294 1308