diff options
author | Robert Jennings <rcj@linux.vnet.ibm.com> | 2007-11-12 10:00:23 -0500 |
---|---|---|
committer | James Bottomley <James.Bottomley@HansenPartnership.com> | 2008-01-11 19:22:45 -0500 |
commit | 860784c8a2b077157b6a51fb8749524d0363cc49 (patch) | |
tree | 720d50af0b50e65969d313e9ae62966f5b8d02ac /drivers/scsi/ibmvscsi/ibmvscsi.c | |
parent | dc8875e1078961591af4fbf9556c44648bf28d11 (diff) |
[SCSI] ibmvscsi: requeue while CRQ closed
CRQ send errors that return with H_CLOSED should return with
SCSI_MLQUEUE_HOST_BUSY until firmware alerts the client of a CRQ
transport event. The transport event will either reinitialize and
requeue the requests or fail and return IO with DID_ERROR.
To avoid failing the eh_* functions while re-attaching to the server
adapter this will retry for a period of time while ibmvscsi_send_srp_event
returns SCSI_MLQUEUE_HOST_BUSY.
In ibmvscsi_eh_abort_handler() the loop includes the search of the
event list. The lock on the hostdata is dropped while waiting to try
again after failing ibmvscsi_send_srp_event. The event could have been
purged if a login was in progress when the function was called.
In ibmvscsi_eh_device_reset_handler() the loop includes the call to
get_event_struct() because a failing call to ibmvscsi_send_srp_event()
will have freed the event struct.
Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Diffstat (limited to 'drivers/scsi/ibmvscsi/ibmvscsi.c')
-rw-r--r-- | drivers/scsi/ibmvscsi/ibmvscsi.c | 151 |
1 files changed, 96 insertions, 55 deletions
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 50120c8c164b..30819012898f 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c | |||
@@ -629,6 +629,16 @@ static int ibmvscsi_send_srp_event(struct srp_event_struct *evt_struct, | |||
629 | list_del(&evt_struct->list); | 629 | list_del(&evt_struct->list); |
630 | del_timer(&evt_struct->timer); | 630 | del_timer(&evt_struct->timer); |
631 | 631 | ||
632 | /* If send_crq returns H_CLOSED, return SCSI_MLQUEUE_HOST_BUSY. | ||
633 | * Firmware will send a CRQ with a transport event (0xFF) to | ||
634 | * tell this client what has happened to the transport. This | ||
635 | * will be handled in ibmvscsi_handle_crq() | ||
636 | */ | ||
637 | if (rc == H_CLOSED) { | ||
638 | dev_warn(hostdata->dev, "send warning. " | ||
639 | "Receive queue closed, will retry.\n"); | ||
640 | goto send_busy; | ||
641 | } | ||
632 | dev_err(hostdata->dev, "send error %d\n", rc); | 642 | dev_err(hostdata->dev, "send error %d\n", rc); |
633 | atomic_inc(&hostdata->request_limit); | 643 | atomic_inc(&hostdata->request_limit); |
634 | goto send_error; | 644 | goto send_error; |
@@ -976,58 +986,74 @@ static int ibmvscsi_eh_abort_handler(struct scsi_cmnd *cmd) | |||
976 | int rsp_rc; | 986 | int rsp_rc; |
977 | unsigned long flags; | 987 | unsigned long flags; |
978 | u16 lun = lun_from_dev(cmd->device); | 988 | u16 lun = lun_from_dev(cmd->device); |
989 | unsigned long wait_switch = 0; | ||
979 | 990 | ||
980 | /* First, find this command in our sent list so we can figure | 991 | /* First, find this command in our sent list so we can figure |
981 | * out the correct tag | 992 | * out the correct tag |
982 | */ | 993 | */ |
983 | spin_lock_irqsave(hostdata->host->host_lock, flags); | 994 | spin_lock_irqsave(hostdata->host->host_lock, flags); |
984 | found_evt = NULL; | 995 | wait_switch = jiffies + (init_timeout * HZ); |
985 | list_for_each_entry(tmp_evt, &hostdata->sent, list) { | 996 | do { |
986 | if (tmp_evt->cmnd == cmd) { | 997 | found_evt = NULL; |
987 | found_evt = tmp_evt; | 998 | list_for_each_entry(tmp_evt, &hostdata->sent, list) { |
988 | break; | 999 | if (tmp_evt->cmnd == cmd) { |
1000 | found_evt = tmp_evt; | ||
1001 | break; | ||
1002 | } | ||
989 | } | 1003 | } |
990 | } | ||
991 | 1004 | ||
992 | if (!found_evt) { | 1005 | if (!found_evt) { |
993 | spin_unlock_irqrestore(hostdata->host->host_lock, flags); | 1006 | spin_unlock_irqrestore(hostdata->host->host_lock, flags); |
994 | return SUCCESS; | 1007 | return SUCCESS; |
995 | } | 1008 | } |
996 | 1009 | ||
997 | evt = get_event_struct(&hostdata->pool); | 1010 | evt = get_event_struct(&hostdata->pool); |
998 | if (evt == NULL) { | 1011 | if (evt == NULL) { |
999 | spin_unlock_irqrestore(hostdata->host->host_lock, flags); | 1012 | spin_unlock_irqrestore(hostdata->host->host_lock, flags); |
1000 | sdev_printk(KERN_ERR, cmd->device, "failed to allocate abort event\n"); | 1013 | sdev_printk(KERN_ERR, cmd->device, |
1001 | return FAILED; | 1014 | "failed to allocate abort event\n"); |
1002 | } | 1015 | return FAILED; |
1016 | } | ||
1003 | 1017 | ||
1004 | init_event_struct(evt, | 1018 | init_event_struct(evt, |
1005 | sync_completion, | 1019 | sync_completion, |
1006 | VIOSRP_SRP_FORMAT, | 1020 | VIOSRP_SRP_FORMAT, |
1007 | init_timeout); | 1021 | init_timeout); |
1008 | 1022 | ||
1009 | tsk_mgmt = &evt->iu.srp.tsk_mgmt; | 1023 | tsk_mgmt = &evt->iu.srp.tsk_mgmt; |
1010 | 1024 | ||
1011 | /* Set up an abort SRP command */ | 1025 | /* Set up an abort SRP command */ |
1012 | memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt)); | 1026 | memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt)); |
1013 | tsk_mgmt->opcode = SRP_TSK_MGMT; | 1027 | tsk_mgmt->opcode = SRP_TSK_MGMT; |
1014 | tsk_mgmt->lun = ((u64) lun) << 48; | 1028 | tsk_mgmt->lun = ((u64) lun) << 48; |
1015 | tsk_mgmt->tsk_mgmt_func = SRP_TSK_ABORT_TASK; | 1029 | tsk_mgmt->tsk_mgmt_func = SRP_TSK_ABORT_TASK; |
1016 | tsk_mgmt->task_tag = (u64) found_evt; | 1030 | tsk_mgmt->task_tag = (u64) found_evt; |
1017 | 1031 | ||
1018 | sdev_printk(KERN_INFO, cmd->device, "aborting command. lun 0x%lx, tag 0x%lx\n", | 1032 | evt->sync_srp = &srp_rsp; |
1019 | tsk_mgmt->lun, tsk_mgmt->task_tag); | 1033 | |
1020 | 1034 | init_completion(&evt->comp); | |
1021 | evt->sync_srp = &srp_rsp; | 1035 | rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2); |
1022 | init_completion(&evt->comp); | 1036 | |
1023 | rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2); | 1037 | if (rsp_rc != SCSI_MLQUEUE_HOST_BUSY) |
1038 | break; | ||
1039 | |||
1040 | spin_unlock_irqrestore(hostdata->host->host_lock, flags); | ||
1041 | msleep(10); | ||
1042 | spin_lock_irqsave(hostdata->host->host_lock, flags); | ||
1043 | } while (time_before(jiffies, wait_switch)); | ||
1044 | |||
1024 | spin_unlock_irqrestore(hostdata->host->host_lock, flags); | 1045 | spin_unlock_irqrestore(hostdata->host->host_lock, flags); |
1046 | |||
1025 | if (rsp_rc != 0) { | 1047 | if (rsp_rc != 0) { |
1026 | sdev_printk(KERN_ERR, cmd->device, | 1048 | sdev_printk(KERN_ERR, cmd->device, |
1027 | "failed to send abort() event. rc=%d\n", rsp_rc); | 1049 | "failed to send abort() event. rc=%d\n", rsp_rc); |
1028 | return FAILED; | 1050 | return FAILED; |
1029 | } | 1051 | } |
1030 | 1052 | ||
1053 | sdev_printk(KERN_INFO, cmd->device, | ||
1054 | "aborting command. lun 0x%lx, tag 0x%lx\n", | ||
1055 | (((u64) lun) << 48), (u64) found_evt); | ||
1056 | |||
1031 | wait_for_completion(&evt->comp); | 1057 | wait_for_completion(&evt->comp); |
1032 | 1058 | ||
1033 | /* make sure we got a good response */ | 1059 | /* make sure we got a good response */ |
@@ -1099,41 +1125,56 @@ static int ibmvscsi_eh_device_reset_handler(struct scsi_cmnd *cmd) | |||
1099 | int rsp_rc; | 1125 | int rsp_rc; |
1100 | unsigned long flags; | 1126 | unsigned long flags; |
1101 | u16 lun = lun_from_dev(cmd->device); | 1127 | u16 lun = lun_from_dev(cmd->device); |
1128 | unsigned long wait_switch = 0; | ||
1102 | 1129 | ||
1103 | spin_lock_irqsave(hostdata->host->host_lock, flags); | 1130 | spin_lock_irqsave(hostdata->host->host_lock, flags); |
1104 | evt = get_event_struct(&hostdata->pool); | 1131 | wait_switch = jiffies + (init_timeout * HZ); |
1105 | if (evt == NULL) { | 1132 | do { |
1106 | spin_unlock_irqrestore(hostdata->host->host_lock, flags); | 1133 | evt = get_event_struct(&hostdata->pool); |
1107 | sdev_printk(KERN_ERR, cmd->device, "failed to allocate reset event\n"); | 1134 | if (evt == NULL) { |
1108 | return FAILED; | 1135 | spin_unlock_irqrestore(hostdata->host->host_lock, flags); |
1109 | } | 1136 | sdev_printk(KERN_ERR, cmd->device, |
1137 | "failed to allocate reset event\n"); | ||
1138 | return FAILED; | ||
1139 | } | ||
1110 | 1140 | ||
1111 | init_event_struct(evt, | 1141 | init_event_struct(evt, |
1112 | sync_completion, | 1142 | sync_completion, |
1113 | VIOSRP_SRP_FORMAT, | 1143 | VIOSRP_SRP_FORMAT, |
1114 | init_timeout); | 1144 | init_timeout); |
1115 | 1145 | ||
1116 | tsk_mgmt = &evt->iu.srp.tsk_mgmt; | 1146 | tsk_mgmt = &evt->iu.srp.tsk_mgmt; |
1117 | 1147 | ||
1118 | /* Set up a lun reset SRP command */ | 1148 | /* Set up a lun reset SRP command */ |
1119 | memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt)); | 1149 | memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt)); |
1120 | tsk_mgmt->opcode = SRP_TSK_MGMT; | 1150 | tsk_mgmt->opcode = SRP_TSK_MGMT; |
1121 | tsk_mgmt->lun = ((u64) lun) << 48; | 1151 | tsk_mgmt->lun = ((u64) lun) << 48; |
1122 | tsk_mgmt->tsk_mgmt_func = SRP_TSK_LUN_RESET; | 1152 | tsk_mgmt->tsk_mgmt_func = SRP_TSK_LUN_RESET; |
1123 | 1153 | ||
1124 | sdev_printk(KERN_INFO, cmd->device, "resetting device. lun 0x%lx\n", | 1154 | evt->sync_srp = &srp_rsp; |
1125 | tsk_mgmt->lun); | 1155 | |
1156 | init_completion(&evt->comp); | ||
1157 | rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2); | ||
1158 | |||
1159 | if (rsp_rc != SCSI_MLQUEUE_HOST_BUSY) | ||
1160 | break; | ||
1161 | |||
1162 | spin_unlock_irqrestore(hostdata->host->host_lock, flags); | ||
1163 | msleep(10); | ||
1164 | spin_lock_irqsave(hostdata->host->host_lock, flags); | ||
1165 | } while (time_before(jiffies, wait_switch)); | ||
1126 | 1166 | ||
1127 | evt->sync_srp = &srp_rsp; | ||
1128 | init_completion(&evt->comp); | ||
1129 | rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2); | ||
1130 | spin_unlock_irqrestore(hostdata->host->host_lock, flags); | 1167 | spin_unlock_irqrestore(hostdata->host->host_lock, flags); |
1168 | |||
1131 | if (rsp_rc != 0) { | 1169 | if (rsp_rc != 0) { |
1132 | sdev_printk(KERN_ERR, cmd->device, | 1170 | sdev_printk(KERN_ERR, cmd->device, |
1133 | "failed to send reset event. rc=%d\n", rsp_rc); | 1171 | "failed to send reset event. rc=%d\n", rsp_rc); |
1134 | return FAILED; | 1172 | return FAILED; |
1135 | } | 1173 | } |
1136 | 1174 | ||
1175 | sdev_printk(KERN_INFO, cmd->device, "resetting device. lun 0x%lx\n", | ||
1176 | (((u64) lun) << 48)); | ||
1177 | |||
1137 | wait_for_completion(&evt->comp); | 1178 | wait_for_completion(&evt->comp); |
1138 | 1179 | ||
1139 | /* make sure we got a good response */ | 1180 | /* make sure we got a good response */ |