aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/ieee1394/sbp2.c
diff options
context:
space:
mode:
authorStefan Richter <stefanr@s5r6.in-berlin.de>2006-08-14 12:43:00 -0400
committerStefan Richter <stefanr@s5r6.in-berlin.de>2006-09-17 13:34:14 -0400
commit09ee67abe997ee95cd3f6cc552fa9532bc722d83 (patch)
tree389f4726e5b4b3be1e4e4ed201a42ae33e67242d /drivers/ieee1394/sbp2.c
parent2a874182842c6a70f245b7f1ad859f9152517951 (diff)
ieee1394: sbp2: handle "sbp2util_node_write_no_wait failed"
Fix for http://bugzilla.kernel.org/show_bug.cgi?id=6948 Because sbp2 writes to the target's fetch agent's registers from within atomic context, it cannot sleep to guaranteedly get a free transaction label. This may repeatedly lead to "sbp2util_node_write_no_wait failed" and consequently to SCSI command abortion after timeout. A likely cause is that many queue_command softirqs may occur before khpsbpkt (the ieee1394 driver's thread which cleans up after finished transactions) is woken up to recycle tlabels. Sbp2 now schedules a workqueue job whenever sbp2_link_orb_command fails in sbp2util_node_write_no_wait. The job will reliably get a transaction label because it can sleep. We use the kernel-wide shared workqueue because it is unlikely that the job itself actually needs to sleep. In the improbable case that it has to sleep, it doesn't need to sleep long since the standard transaction timeout is 100ms. Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Diffstat (limited to 'drivers/ieee1394/sbp2.c')
-rw-r--r--drivers/ieee1394/sbp2.c71
1 files changed, 66 insertions, 5 deletions
diff --git a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c
index 668b4512deff..c4d30a7b8b1e 100644
--- a/drivers/ieee1394/sbp2.c
+++ b/drivers/ieee1394/sbp2.c
@@ -465,6 +465,44 @@ static int sbp2util_node_write_no_wait(struct node_entry *ne, u64 addr,
465 return 0; 465 return 0;
466} 466}
467 467
468static void sbp2util_notify_fetch_agent(struct scsi_id_instance_data *scsi_id,
469 u64 offset, quadlet_t *data, size_t len)
470{
471 /*
472 * There is a small window after a bus reset within which the node
473 * entry's generation is current but the reconnect wasn't completed.
474 */
475 if (atomic_read(&scsi_id->unfinished_reset))
476 return;
477
478 if (hpsb_node_write(scsi_id->ne,
479 scsi_id->sbp2_command_block_agent_addr + offset,
480 data, len))
481 SBP2_ERR("sbp2util_notify_fetch_agent failed.");
482 /*
483 * Now accept new SCSI commands, unless a bus reset happended during
484 * hpsb_node_write.
485 */
486 if (!atomic_read(&scsi_id->unfinished_reset))
487 scsi_unblock_requests(scsi_id->scsi_host);
488}
489
490static void sbp2util_write_orb_pointer(void *p)
491{
492 quadlet_t data[2];
493
494 data[0] = ORB_SET_NODE_ID(
495 ((struct scsi_id_instance_data *)p)->hi->host->node_id);
496 data[1] = ((struct scsi_id_instance_data *)p)->last_orb_dma;
497 sbp2util_cpu_to_be32_buffer(data, 8);
498 sbp2util_notify_fetch_agent(p, SBP2_ORB_POINTER_OFFSET, data, 8);
499}
500
501static void sbp2util_write_doorbell(void *p)
502{
503 sbp2util_notify_fetch_agent(p, SBP2_DOORBELL_OFFSET, NULL, 4);
504}
505
468/* 506/*
469 * This function is called to create a pool of command orbs used for 507 * This function is called to create a pool of command orbs used for
470 * command processing. It is called when a new sbp2 device is detected. 508 * command processing. It is called when a new sbp2 device is detected.
@@ -712,6 +750,7 @@ static int sbp2_remove(struct device *dev)
712 sbp2scsi_complete_all_commands(scsi_id, DID_NO_CONNECT); 750 sbp2scsi_complete_all_commands(scsi_id, DID_NO_CONNECT);
713 /* scsi_remove_device() will trigger shutdown functions of SCSI 751 /* scsi_remove_device() will trigger shutdown functions of SCSI
714 * highlevel drivers which would deadlock if blocked. */ 752 * highlevel drivers which would deadlock if blocked. */
753 atomic_set(&scsi_id->unfinished_reset, 0);
715 scsi_unblock_requests(scsi_id->scsi_host); 754 scsi_unblock_requests(scsi_id->scsi_host);
716 } 755 }
717 sdev = scsi_id->sdev; 756 sdev = scsi_id->sdev;
@@ -765,6 +804,7 @@ static int sbp2_update(struct unit_directory *ud)
765 804
766 /* Make sure we unblock requests (since this is likely after a bus 805 /* Make sure we unblock requests (since this is likely after a bus
767 * reset). */ 806 * reset). */
807 atomic_set(&scsi_id->unfinished_reset, 0);
768 scsi_unblock_requests(scsi_id->scsi_host); 808 scsi_unblock_requests(scsi_id->scsi_host);
769 809
770 return 0; 810 return 0;
@@ -795,6 +835,8 @@ static struct scsi_id_instance_data *sbp2_alloc_device(struct unit_directory *ud
795 INIT_LIST_HEAD(&scsi_id->sbp2_command_orb_completed); 835 INIT_LIST_HEAD(&scsi_id->sbp2_command_orb_completed);
796 INIT_LIST_HEAD(&scsi_id->scsi_list); 836 INIT_LIST_HEAD(&scsi_id->scsi_list);
797 spin_lock_init(&scsi_id->sbp2_command_orb_lock); 837 spin_lock_init(&scsi_id->sbp2_command_orb_lock);
838 atomic_set(&scsi_id->unfinished_reset, 0);
839 INIT_WORK(&scsi_id->protocol_work, NULL, NULL);
798 840
799 ud->device.driver_data = scsi_id; 841 ud->device.driver_data = scsi_id;
800 842
@@ -879,8 +921,10 @@ static void sbp2_host_reset(struct hpsb_host *host)
879 hi = hpsb_get_hostinfo(&sbp2_highlevel, host); 921 hi = hpsb_get_hostinfo(&sbp2_highlevel, host);
880 922
881 if (hi) { 923 if (hi) {
882 list_for_each_entry(scsi_id, &hi->scsi_ids, scsi_list) 924 list_for_each_entry(scsi_id, &hi->scsi_ids, scsi_list) {
925 atomic_set(&scsi_id->unfinished_reset, 1);
883 scsi_block_requests(scsi_id->scsi_host); 926 scsi_block_requests(scsi_id->scsi_host);
927 }
884 } 928 }
885} 929}
886 930
@@ -1032,7 +1076,7 @@ static void sbp2_remove_device(struct scsi_id_instance_data *scsi_id)
1032 scsi_remove_host(scsi_id->scsi_host); 1076 scsi_remove_host(scsi_id->scsi_host);
1033 scsi_host_put(scsi_id->scsi_host); 1077 scsi_host_put(scsi_id->scsi_host);
1034 } 1078 }
1035 1079 flush_scheduled_work();
1036 sbp2util_remove_command_orb_pool(scsi_id); 1080 sbp2util_remove_command_orb_pool(scsi_id);
1037 1081
1038 list_del(&scsi_id->scsi_list); 1082 list_del(&scsi_id->scsi_list);
@@ -1661,6 +1705,10 @@ static int sbp2_agent_reset(struct scsi_id_instance_data *scsi_id, int wait)
1661 1705
1662 SBP2_DEBUG_ENTER(); 1706 SBP2_DEBUG_ENTER();
1663 1707
1708 cancel_delayed_work(&scsi_id->protocol_work);
1709 if (wait)
1710 flush_scheduled_work();
1711
1664 data = ntohl(SBP2_AGENT_RESET_DATA); 1712 data = ntohl(SBP2_AGENT_RESET_DATA);
1665 addr = scsi_id->sbp2_command_block_agent_addr + SBP2_AGENT_RESET_OFFSET; 1713 addr = scsi_id->sbp2_command_block_agent_addr + SBP2_AGENT_RESET_OFFSET;
1666 1714
@@ -1982,9 +2030,22 @@ static void sbp2_link_orb_command(struct scsi_id_instance_data *scsi_id,
1982 2030
1983 SBP2_ORB_DEBUG("write to %s register, command orb %p", 2031 SBP2_ORB_DEBUG("write to %s register, command orb %p",
1984 last_orb ? "DOORBELL" : "ORB_POINTER", command_orb); 2032 last_orb ? "DOORBELL" : "ORB_POINTER", command_orb);
1985 if (sbp2util_node_write_no_wait(scsi_id->ne, addr, data, length)) 2033 if (sbp2util_node_write_no_wait(scsi_id->ne, addr, data, length)) {
1986 SBP2_ERR("sbp2util_node_write_no_wait failed.\n"); 2034 /*
1987 /* We rely on SCSI EH to deal with _node_write_ failures. */ 2035 * sbp2util_node_write_no_wait failed. We certainly ran out
2036 * of transaction labels, perhaps just because there were no
2037 * context switches which gave khpsbpkt a chance to collect
2038 * free tlabels. Try again in non-atomic context. If necessary,
2039 * the workqueue job will sleep to guaranteedly get a tlabel.
2040 * We do not accept new commands until the job is over.
2041 */
2042 scsi_block_requests(scsi_id->scsi_host);
2043 PREPARE_WORK(&scsi_id->protocol_work,
2044 last_orb ? sbp2util_write_doorbell:
2045 sbp2util_write_orb_pointer,
2046 scsi_id);
2047 schedule_work(&scsi_id->protocol_work);
2048 }
1988} 2049}
1989 2050
1990/* 2051/*