aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBart Van Assche <bvanassche@acm.org>2014-07-09 09:57:26 -0400
committerRoland Dreier <roland@purestorage.com>2014-08-01 18:21:51 -0400
commitbcc05910359183b431da92713e98eed478edf83a (patch)
treecc0fe7dada1793a8bafc4c87b8d0f888e1c1dd0a
parentcd53eb686d2418eda938aad3c9da42b7dfa9351f (diff)
IB/srp: Fix deadlock between host removal and multipathd
If scsi_remove_host() is invoked after a SCSI device has been blocked, if the fast_io_fail_tmo or dev_loss_tmo work gets scheduled on the workqueue executing srp_remove_work() and if an I/O request is scheduled after the SCSI device had been blocked by e.g. multipathd then the following deadlock can occur: kworker/6:1 D ffff880831f3c460 0 195 2 0x00000000 Call Trace: [<ffffffff814aafd9>] schedule+0x29/0x70 [<ffffffff814aa0ef>] schedule_timeout+0x10f/0x2a0 [<ffffffff8105af6f>] msleep+0x2f/0x40 [<ffffffff8123b0ae>] __blk_drain_queue+0x4e/0x180 [<ffffffff8123d2d5>] blk_cleanup_queue+0x225/0x230 [<ffffffffa0010732>] __scsi_remove_device+0x62/0xe0 [scsi_mod] [<ffffffffa000ed2f>] scsi_forget_host+0x6f/0x80 [scsi_mod] [<ffffffffa0002eba>] scsi_remove_host+0x7a/0x130 [scsi_mod] [<ffffffffa07cf5c5>] srp_remove_work+0x95/0x180 [ib_srp] [<ffffffff8106d7aa>] process_one_work+0x1ea/0x6c0 [<ffffffff8106dd9b>] worker_thread+0x11b/0x3a0 [<ffffffff810758bd>] kthread+0xed/0x110 [<ffffffff814b972c>] ret_from_fork+0x7c/0xb0 multipathd D ffff880096acc460 0 5340 1 0x00000000 Call Trace: [<ffffffff814aafd9>] schedule+0x29/0x70 [<ffffffff814aa0ef>] schedule_timeout+0x10f/0x2a0 [<ffffffff814ab79b>] io_schedule_timeout+0x9b/0xf0 [<ffffffff814abe1c>] wait_for_completion_io_timeout+0xdc/0x110 [<ffffffff81244b9b>] blk_execute_rq+0x9b/0x100 [<ffffffff8124f665>] sg_io+0x1a5/0x450 [<ffffffff8124fd21>] scsi_cmd_ioctl+0x2a1/0x430 [<ffffffff8124fef2>] scsi_cmd_blk_ioctl+0x42/0x50 [<ffffffffa00ec97e>] sd_ioctl+0xbe/0x140 [sd_mod] [<ffffffff8124bd04>] blkdev_ioctl+0x234/0x840 [<ffffffff811cb491>] block_ioctl+0x41/0x50 [<ffffffff811a0df0>] do_vfs_ioctl+0x300/0x520 [<ffffffff811a1051>] SyS_ioctl+0x41/0x80 [<ffffffff814b9962>] tracesys+0xd0/0xd5 Fix this by scheduling removal work on another workqueue than the transport layer timers. Signed-off-by: Bart Van Assche <bvanassche@acm.org> Reviewed-by: Sagi Grimberg <sagig@mellanox.com> Reviewed-by: David Dillow <dave@thedillows.org> Cc: Sebastian Parschauer <sebastian.riemer@profitbricks.com> Cc: <stable@vger.kernel.org> Signed-off-by: Roland Dreier <roland@purestorage.com>
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c38
1 files changed, 28 insertions, 10 deletions
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index e3c2c5b4297f..767000811cf9 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -130,6 +130,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr);
130static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); 130static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
131 131
132static struct scsi_transport_template *ib_srp_transport_template; 132static struct scsi_transport_template *ib_srp_transport_template;
133static struct workqueue_struct *srp_remove_wq;
133 134
134static struct ib_client srp_client = { 135static struct ib_client srp_client = {
135 .name = "srp", 136 .name = "srp",
@@ -731,7 +732,7 @@ static bool srp_queue_remove_work(struct srp_target_port *target)
731 spin_unlock_irq(&target->lock); 732 spin_unlock_irq(&target->lock);
732 733
733 if (changed) 734 if (changed)
734 queue_work(system_long_wq, &target->remove_work); 735 queue_work(srp_remove_wq, &target->remove_work);
735 736
736 return changed; 737 return changed;
737} 738}
@@ -3261,9 +3262,10 @@ static void srp_remove_one(struct ib_device *device)
3261 spin_unlock(&host->target_lock); 3262 spin_unlock(&host->target_lock);
3262 3263
3263 /* 3264 /*
3264 * Wait for target port removal tasks. 3265 * Wait for tl_err and target port removal tasks.
3265 */ 3266 */
3266 flush_workqueue(system_long_wq); 3267 flush_workqueue(system_long_wq);
3268 flush_workqueue(srp_remove_wq);
3267 3269
3268 kfree(host); 3270 kfree(host);
3269 } 3271 }
@@ -3313,16 +3315,22 @@ static int __init srp_init_module(void)
3313 indirect_sg_entries = cmd_sg_entries; 3315 indirect_sg_entries = cmd_sg_entries;
3314 } 3316 }
3315 3317
3318 srp_remove_wq = create_workqueue("srp_remove");
3319 if (IS_ERR(srp_remove_wq)) {
3320 ret = PTR_ERR(srp_remove_wq);
3321 goto out;
3322 }
3323
3324 ret = -ENOMEM;
3316 ib_srp_transport_template = 3325 ib_srp_transport_template =
3317 srp_attach_transport(&ib_srp_transport_functions); 3326 srp_attach_transport(&ib_srp_transport_functions);
3318 if (!ib_srp_transport_template) 3327 if (!ib_srp_transport_template)
3319 return -ENOMEM; 3328 goto destroy_wq;
3320 3329
3321 ret = class_register(&srp_class); 3330 ret = class_register(&srp_class);
3322 if (ret) { 3331 if (ret) {
3323 pr_err("couldn't register class infiniband_srp\n"); 3332 pr_err("couldn't register class infiniband_srp\n");
3324 srp_release_transport(ib_srp_transport_template); 3333 goto release_tr;
3325 return ret;
3326 } 3334 }
3327 3335
3328 ib_sa_register_client(&srp_sa_client); 3336 ib_sa_register_client(&srp_sa_client);
@@ -3330,13 +3338,22 @@ static int __init srp_init_module(void)
3330 ret = ib_register_client(&srp_client); 3338 ret = ib_register_client(&srp_client);
3331 if (ret) { 3339 if (ret) {
3332 pr_err("couldn't register IB client\n"); 3340 pr_err("couldn't register IB client\n");
3333 srp_release_transport(ib_srp_transport_template); 3341 goto unreg_sa;
3334 ib_sa_unregister_client(&srp_sa_client);
3335 class_unregister(&srp_class);
3336 return ret;
3337 } 3342 }
3338 3343
3339 return 0; 3344out:
3345 return ret;
3346
3347unreg_sa:
3348 ib_sa_unregister_client(&srp_sa_client);
3349 class_unregister(&srp_class);
3350
3351release_tr:
3352 srp_release_transport(ib_srp_transport_template);
3353
3354destroy_wq:
3355 destroy_workqueue(srp_remove_wq);
3356 goto out;
3340} 3357}
3341 3358
3342static void __exit srp_cleanup_module(void) 3359static void __exit srp_cleanup_module(void)
@@ -3345,6 +3362,7 @@ static void __exit srp_cleanup_module(void)
3345 ib_sa_unregister_client(&srp_sa_client); 3362 ib_sa_unregister_client(&srp_sa_client);
3346 class_unregister(&srp_class); 3363 class_unregister(&srp_class);
3347 srp_release_transport(ib_srp_transport_template); 3364 srp_release_transport(ib_srp_transport_template);
3365 destroy_workqueue(srp_remove_wq);
3348} 3366}
3349 3367
3350module_init(srp_init_module); 3368module_init(srp_init_module);