aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNithin Nayak Sujir <nsujir@broadcom.com>2011-04-25 15:30:06 -0400
committerJames Bottomley <James.Bottomley@suse.de>2011-05-01 12:50:22 -0400
commit112f661d6dac9af1235d2d05299fc2c9cb876ae7 (patch)
tree73071fae0bf9c0e6e2f89a0d81d557087fcd4556
parentb413f498e12faaf5912de89e7ac7e882956e0b0a (diff)
[SCSI] scsi_transport_fc: Fix deadlock during fc_remove_host
Creating and destroying fcoe interface in a tight loop leads to a system deadlock with the following call traces: Call Trace: [<ffffffff814f4b3d>] schedule_timeout+0x1fd/0x2c0 [<ffffffff814f469f>] ? wait_for_common+0x4f/0x190 [<ffffffff814f469f>] ? wait_for_common+0x4f/0x190 [<ffffffff814f4737>] wait_for_common+0xe7/0x190 [<ffffffff81042fa0>] ? default_wake_function+0x0/0x20 [<ffffffff81082c2d>] ? trace_hardirqs_on+0xd/0x10 [<ffffffff814f48bd>] wait_for_completion+0x1d/0x20 [<ffffffff81066d90>] flush_workqueue+0x290/0x5f0 [<ffffffff81066b00>] ? flush_workqueue+0x0/0x5f0 [<ffffffff81067148>] destroy_workqueue+0x38/0x340 [<ffffffffa0260289>] fc_remove_host+0x1b9/0x1f0 [scsi_transport_fc] [<ffffffffa02ed195>] bnx2fc_if_destroy+0xc5/0x1f0 [bnx2fc] [<ffffffffa02ed33a>] bnx2fc_destroy+0x7a/0x100 [bnx2fc] [<ffffffffa02c789b>] fcoe_transport_destroy+0x9b/0x1b0 [libfcoe] [<ffffffff81069ec2>] param_attr_store+0x52/0x80 [<ffffffff81069976>] module_attr_store+0x26/0x30 [<ffffffff8119e726>] sysfs_write_file+0xe6/0x170 [<ffffffff81134710>] vfs_write+0xd0/0x1a0 [<ffffffff811348e4>] sys_write+0x54/0xa0 [<ffffffff81002e02>] system_call_fastpath+0x16/0x1b Call Trace: [<ffffffff81074865>] async_synchronize_cookie_domain+0x75/0x120 [<ffffffff8106caa0>] ? autoremove_wake_function+0x0/0x40 [<ffffffff81074925>] async_synchronize_cookie+0x15/0x20 [<ffffffff8107494c>] async_synchronize_full+0x1c/0x40 [<ffffffffa0057466>] sd_remove+0x36/0xc0 [sd_mod] [<ffffffff81358a75>] __device_release_driver+0x75/0xe0 [<ffffffff81358bef>] device_release_driver+0x2f/0x50 [<ffffffff81357aee>] bus_remove_device+0xbe/0x120 [<ffffffff813553ef>] device_del+0x12f/0x1e0 [<ffffffff8137454d>] __scsi_remove_device+0xbd/0xc0 [<ffffffff81374585>] scsi_remove_device+0x35/0x50 [<ffffffff813746a7>] __scsi_remove_target+0xe7/0x110 [<ffffffff81374730>] ? __remove_child+0x0/0x30 [<ffffffff81374753>] __remove_child+0x23/0x30 [<ffffffff81354a2c>] device_for_each_child+0x4c/0x80 [<ffffffff81374703>] scsi_remove_target+0x33/0x60 [<ffffffffa02622c6>] fc_starget_delete+0x26/0x30 [scsi_transport_fc] [<ffffffffa026271a>] fc_rport_final_delete+0xaa/0x200 [scsi_transport_fc] [<ffffffff8106585a>] process_one_work+0x1aa/0x540 [<ffffffff810657eb>] ? process_one_work+0x13b/0x540 [<ffffffffa0262670>] ? fc_rport_final_delete+0x0/0x200 [scsi_transport_fc] [<ffffffff81067ac9>] worker_thread+0x179/0x410 [<ffffffff81067950>] ? worker_thread+0x0/0x410 [<ffffffff8106c546>] kthread+0xb6/0xc0 [<ffffffff8103879b>] ? finish_task_switch+0x4b/0xe0 [<ffffffff81003ca4>] kernel_thread_helper+0x4/0x10 [<ffffffff814f7994>] ? restore_args+0x0/0x30 [<ffffffff8106c490>] ? kthread+0x0/0xc0 [<ffffffff81003ca0>] ? kernel_thread_helper+0x0/0x10 fc_remove_host() waits for flushing the workqueue, but it is stuck at flushing the first work. The first work doesnt complete, because it is waiting for async layer to complete the IOs. The async layer cannot complete the IO as the terminate_rport_io for the second work was not called, which will be called only when the first work completes. Hence the deadlock. To resolve this deadlock, the workqueue allocation has been modified from create_singlethread_workqueue() to alloc_workqueue(). In addition, fc_terminate_rport_io() should be called before the scsi_flush_work() to avoid the similar deadlock as above. scsi fc alloc queue. move terminate rport io before flush Signed-off-by: Nithin Nayak Sujir <nsujir@broadcom.com> Signed-off-by: Bhanu Prakash Gollapudi <bprakash@broadcom.com> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
-rw-r--r--drivers/scsi/scsi_transport_fc.c11
1 files changed, 5 insertions, 6 deletions
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index fdf3fa63905..358dff6732e 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -422,8 +422,7 @@ static int fc_host_setup(struct transport_container *tc, struct device *dev,
422 422
423 snprintf(fc_host->work_q_name, sizeof(fc_host->work_q_name), 423 snprintf(fc_host->work_q_name, sizeof(fc_host->work_q_name),
424 "fc_wq_%d", shost->host_no); 424 "fc_wq_%d", shost->host_no);
425 fc_host->work_q = create_singlethread_workqueue( 425 fc_host->work_q = alloc_workqueue(fc_host->work_q_name, 0, 0);
426 fc_host->work_q_name);
427 if (!fc_host->work_q) 426 if (!fc_host->work_q)
428 return -ENOMEM; 427 return -ENOMEM;
429 428
@@ -431,8 +430,8 @@ static int fc_host_setup(struct transport_container *tc, struct device *dev,
431 snprintf(fc_host->devloss_work_q_name, 430 snprintf(fc_host->devloss_work_q_name,
432 sizeof(fc_host->devloss_work_q_name), 431 sizeof(fc_host->devloss_work_q_name),
433 "fc_dl_%d", shost->host_no); 432 "fc_dl_%d", shost->host_no);
434 fc_host->devloss_work_q = create_singlethread_workqueue( 433 fc_host->devloss_work_q =
435 fc_host->devloss_work_q_name); 434 alloc_workqueue(fc_host->devloss_work_q_name, 0, 0);
436 if (!fc_host->devloss_work_q) { 435 if (!fc_host->devloss_work_q) {
437 destroy_workqueue(fc_host->work_q); 436 destroy_workqueue(fc_host->work_q);
438 fc_host->work_q = NULL; 437 fc_host->work_q = NULL;
@@ -2489,6 +2488,8 @@ fc_rport_final_delete(struct work_struct *work)
2489 unsigned long flags; 2488 unsigned long flags;
2490 int do_callback = 0; 2489 int do_callback = 0;
2491 2490
2491 fc_terminate_rport_io(rport);
2492
2492 /* 2493 /*
2493 * if a scan is pending, flush the SCSI Host work_q so that 2494 * if a scan is pending, flush the SCSI Host work_q so that
2494 * that we can reclaim the rport scan work element. 2495 * that we can reclaim the rport scan work element.
@@ -2496,8 +2497,6 @@ fc_rport_final_delete(struct work_struct *work)
2496 if (rport->flags & FC_RPORT_SCAN_PENDING) 2497 if (rport->flags & FC_RPORT_SCAN_PENDING)
2497 scsi_flush_work(shost); 2498 scsi_flush_work(shost);
2498 2499
2499 fc_terminate_rport_io(rport);
2500
2501 /* 2500 /*
2502 * Cancel any outstanding timers. These should really exist 2501 * Cancel any outstanding timers. These should really exist
2503 * only when rmmod'ing the LLDD and we're asking for 2502 * only when rmmod'ing the LLDD and we're asking for