diff options
author | Bart Van Assche <bvanassche@acm.org> | 2014-07-09 09:57:26 -0400 |
---|---|---|
committer | Roland Dreier <roland@purestorage.com> | 2014-08-01 18:21:51 -0400 |
commit | bcc05910359183b431da92713e98eed478edf83a (patch) | |
tree | cc0fe7dada1793a8bafc4c87b8d0f888e1c1dd0a | |
parent | cd53eb686d2418eda938aad3c9da42b7dfa9351f (diff) |
IB/srp: Fix deadlock between host removal and multipathd
If scsi_remove_host() is invoked after a SCSI device has been blocked,
if the fast_io_fail_tmo or dev_loss_tmo work gets scheduled on the
workqueue executing srp_remove_work() and if an I/O request is
scheduled after the SCSI device had been blocked by e.g. multipathd
then the following deadlock can occur:
kworker/6:1 D ffff880831f3c460 0 195 2 0x00000000
Call Trace:
[<ffffffff814aafd9>] schedule+0x29/0x70
[<ffffffff814aa0ef>] schedule_timeout+0x10f/0x2a0
[<ffffffff8105af6f>] msleep+0x2f/0x40
[<ffffffff8123b0ae>] __blk_drain_queue+0x4e/0x180
[<ffffffff8123d2d5>] blk_cleanup_queue+0x225/0x230
[<ffffffffa0010732>] __scsi_remove_device+0x62/0xe0 [scsi_mod]
[<ffffffffa000ed2f>] scsi_forget_host+0x6f/0x80 [scsi_mod]
[<ffffffffa0002eba>] scsi_remove_host+0x7a/0x130 [scsi_mod]
[<ffffffffa07cf5c5>] srp_remove_work+0x95/0x180 [ib_srp]
[<ffffffff8106d7aa>] process_one_work+0x1ea/0x6c0
[<ffffffff8106dd9b>] worker_thread+0x11b/0x3a0
[<ffffffff810758bd>] kthread+0xed/0x110
[<ffffffff814b972c>] ret_from_fork+0x7c/0xb0
multipathd D ffff880096acc460 0 5340 1 0x00000000
Call Trace:
[<ffffffff814aafd9>] schedule+0x29/0x70
[<ffffffff814aa0ef>] schedule_timeout+0x10f/0x2a0
[<ffffffff814ab79b>] io_schedule_timeout+0x9b/0xf0
[<ffffffff814abe1c>] wait_for_completion_io_timeout+0xdc/0x110
[<ffffffff81244b9b>] blk_execute_rq+0x9b/0x100
[<ffffffff8124f665>] sg_io+0x1a5/0x450
[<ffffffff8124fd21>] scsi_cmd_ioctl+0x2a1/0x430
[<ffffffff8124fef2>] scsi_cmd_blk_ioctl+0x42/0x50
[<ffffffffa00ec97e>] sd_ioctl+0xbe/0x140 [sd_mod]
[<ffffffff8124bd04>] blkdev_ioctl+0x234/0x840
[<ffffffff811cb491>] block_ioctl+0x41/0x50
[<ffffffff811a0df0>] do_vfs_ioctl+0x300/0x520
[<ffffffff811a1051>] SyS_ioctl+0x41/0x80
[<ffffffff814b9962>] tracesys+0xd0/0xd5
Fix this by scheduling removal work on another workqueue than the
transport layer timers.
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: David Dillow <dave@thedillows.org>
Cc: Sebastian Parschauer <sebastian.riemer@profitbricks.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Roland Dreier <roland@purestorage.com>
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.c | 38 |
1 files changed, 28 insertions, 10 deletions
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index e3c2c5b4297f..767000811cf9 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c | |||
@@ -130,6 +130,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr); | |||
130 | static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); | 130 | static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); |
131 | 131 | ||
132 | static struct scsi_transport_template *ib_srp_transport_template; | 132 | static struct scsi_transport_template *ib_srp_transport_template; |
133 | static struct workqueue_struct *srp_remove_wq; | ||
133 | 134 | ||
134 | static struct ib_client srp_client = { | 135 | static struct ib_client srp_client = { |
135 | .name = "srp", | 136 | .name = "srp", |
@@ -731,7 +732,7 @@ static bool srp_queue_remove_work(struct srp_target_port *target) | |||
731 | spin_unlock_irq(&target->lock); | 732 | spin_unlock_irq(&target->lock); |
732 | 733 | ||
733 | if (changed) | 734 | if (changed) |
734 | queue_work(system_long_wq, &target->remove_work); | 735 | queue_work(srp_remove_wq, &target->remove_work); |
735 | 736 | ||
736 | return changed; | 737 | return changed; |
737 | } | 738 | } |
@@ -3261,9 +3262,10 @@ static void srp_remove_one(struct ib_device *device) | |||
3261 | spin_unlock(&host->target_lock); | 3262 | spin_unlock(&host->target_lock); |
3262 | 3263 | ||
3263 | /* | 3264 | /* |
3264 | * Wait for target port removal tasks. | 3265 | * Wait for tl_err and target port removal tasks. |
3265 | */ | 3266 | */ |
3266 | flush_workqueue(system_long_wq); | 3267 | flush_workqueue(system_long_wq); |
3268 | flush_workqueue(srp_remove_wq); | ||
3267 | 3269 | ||
3268 | kfree(host); | 3270 | kfree(host); |
3269 | } | 3271 | } |
@@ -3313,16 +3315,22 @@ static int __init srp_init_module(void) | |||
3313 | indirect_sg_entries = cmd_sg_entries; | 3315 | indirect_sg_entries = cmd_sg_entries; |
3314 | } | 3316 | } |
3315 | 3317 | ||
3318 | srp_remove_wq = create_workqueue("srp_remove"); | ||
3319 | if (IS_ERR(srp_remove_wq)) { | ||
3320 | ret = PTR_ERR(srp_remove_wq); | ||
3321 | goto out; | ||
3322 | } | ||
3323 | |||
3324 | ret = -ENOMEM; | ||
3316 | ib_srp_transport_template = | 3325 | ib_srp_transport_template = |
3317 | srp_attach_transport(&ib_srp_transport_functions); | 3326 | srp_attach_transport(&ib_srp_transport_functions); |
3318 | if (!ib_srp_transport_template) | 3327 | if (!ib_srp_transport_template) |
3319 | return -ENOMEM; | 3328 | goto destroy_wq; |
3320 | 3329 | ||
3321 | ret = class_register(&srp_class); | 3330 | ret = class_register(&srp_class); |
3322 | if (ret) { | 3331 | if (ret) { |
3323 | pr_err("couldn't register class infiniband_srp\n"); | 3332 | pr_err("couldn't register class infiniband_srp\n"); |
3324 | srp_release_transport(ib_srp_transport_template); | 3333 | goto release_tr; |
3325 | return ret; | ||
3326 | } | 3334 | } |
3327 | 3335 | ||
3328 | ib_sa_register_client(&srp_sa_client); | 3336 | ib_sa_register_client(&srp_sa_client); |
@@ -3330,13 +3338,22 @@ static int __init srp_init_module(void) | |||
3330 | ret = ib_register_client(&srp_client); | 3338 | ret = ib_register_client(&srp_client); |
3331 | if (ret) { | 3339 | if (ret) { |
3332 | pr_err("couldn't register IB client\n"); | 3340 | pr_err("couldn't register IB client\n"); |
3333 | srp_release_transport(ib_srp_transport_template); | 3341 | goto unreg_sa; |
3334 | ib_sa_unregister_client(&srp_sa_client); | ||
3335 | class_unregister(&srp_class); | ||
3336 | return ret; | ||
3337 | } | 3342 | } |
3338 | 3343 | ||
3339 | return 0; | 3344 | out: |
3345 | return ret; | ||
3346 | |||
3347 | unreg_sa: | ||
3348 | ib_sa_unregister_client(&srp_sa_client); | ||
3349 | class_unregister(&srp_class); | ||
3350 | |||
3351 | release_tr: | ||
3352 | srp_release_transport(ib_srp_transport_template); | ||
3353 | |||
3354 | destroy_wq: | ||
3355 | destroy_workqueue(srp_remove_wq); | ||
3356 | goto out; | ||
3340 | } | 3357 | } |
3341 | 3358 | ||
3342 | static void __exit srp_cleanup_module(void) | 3359 | static void __exit srp_cleanup_module(void) |
@@ -3345,6 +3362,7 @@ static void __exit srp_cleanup_module(void) | |||
3345 | ib_sa_unregister_client(&srp_sa_client); | 3362 | ib_sa_unregister_client(&srp_sa_client); |
3346 | class_unregister(&srp_class); | 3363 | class_unregister(&srp_class); |
3347 | srp_release_transport(ib_srp_transport_template); | 3364 | srp_release_transport(ib_srp_transport_template); |
3365 | destroy_workqueue(srp_remove_wq); | ||
3348 | } | 3366 | } |
3349 | 3367 | ||
3350 | module_init(srp_init_module); | 3368 | module_init(srp_init_module); |