diff options
author | Chad Dupuis <chad.dupuis@cavium.com> | 2017-11-15 10:06:06 -0500 |
---|---|---|
committer | Martin K. Petersen <martin.petersen@oracle.com> | 2017-11-15 18:44:56 -0500 |
commit | faae19be80be7c39c9ce8b04bcc9cc10da82c29e (patch) | |
tree | d7f44d02686ae3fcd740dc6d5a618efd56f4bc78 | |
parent | 6363b3f3ac5be096d08c8c504128befa0c033529 (diff) |
scsi: bnx2fc: Fix hung task messages when a cleanup response is not received during abort
If a cleanup task is not responded to while we are in bnx2fc_abts_cleanup, it
will hang the SCSI error handler since we use wait_for_completion instead of
wait_for_completion_timeout. So, use wait_for_completion_timeout so that we
don't hang the SCSI error handler thread forever.
Fixes the call trace:
[183373.131468] INFO: task scsi_eh_16:110146 blocked for more than 120 seconds.
[183373.131469] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[183373.131470] scsi_eh_16 D ffff88103f2fca14 0 110146 2 0x00000080
[183373.131472] ffff880855e77cb0 0000000000000046 ffff881050654e70 ffff880855e77fd8
[183373.131474] ffff880855e77fd8 ffff880855e77fd8 ffff881050654e70 ffff88103f2fcb48
[183373.131475] ffff88103f2fcb50 7fffffffffffffff ffff881050654e70 ffff88103f2fca14
[183373.131477] Call Trace:
[183373.131479] [<ffffffff8168b579>] schedule+0x29/0x70
[183373.131481] [<ffffffff81688fc9>] schedule_timeout+0x239/0x2d0
[183373.131486] [<ffffffff8142821e>] ? __dev_printk+0x3e/0x90
[183373.131487] [<ffffffff814282cd>] ? dev_printk+0x5d/0x80
[183373.131490] [<ffffffff8168b956>] wait_for_completion+0x116/0x170
[183373.131492] [<ffffffff810c4ec0>] ? wake_up_state+0x20/0x20
[183373.131494] [<ffffffffa048c234>] bnx2fc_abts_cleanup+0x3d/0x62 [bnx2fc]
[183373.131497] [<ffffffffa0483a80>] bnx2fc_eh_abort+0x470/0x580 [bnx2fc]
[183373.131500] [<ffffffff814570af>] scsi_error_handler+0x59f/0x8b0
[183373.131501] [<ffffffff81456b10>] ? scsi_eh_get_sense+0x250/0x250
[183373.131503] [<ffffffff810b052f>] kthread+0xcf/0xe0
[183373.131505] [<ffffffff810b0460>] ? kthread_create_on_node+0x140/0x140
[183373.131507] [<ffffffff81696418>] ret_from_fork+0x58/0x90
[183373.131509] [<ffffffff810b0460>] ? kthread_create_on_node+0x140/0x140
Signed-off-by: Chad Dupuis <chad.dupuis@cavium.com>
Reviewed-by: Laurence Oberman <loberman@redhat.com>
Tested-by: Laurence Oberman <loberman@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
-rw-r--r-- | drivers/scsi/bnx2fc/bnx2fc_io.c | 40 |
1 files changed, 32 insertions, 8 deletions
diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index 5b6153f23f01..8e2f767147cb 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c | |||
@@ -1084,24 +1084,35 @@ static int bnx2fc_abts_cleanup(struct bnx2fc_cmd *io_req) | |||
1084 | { | 1084 | { |
1085 | struct bnx2fc_rport *tgt = io_req->tgt; | 1085 | struct bnx2fc_rport *tgt = io_req->tgt; |
1086 | int rc = SUCCESS; | 1086 | int rc = SUCCESS; |
1087 | unsigned int time_left; | ||
1087 | 1088 | ||
1088 | io_req->wait_for_comp = 1; | 1089 | io_req->wait_for_comp = 1; |
1089 | bnx2fc_initiate_cleanup(io_req); | 1090 | bnx2fc_initiate_cleanup(io_req); |
1090 | 1091 | ||
1091 | spin_unlock_bh(&tgt->tgt_lock); | 1092 | spin_unlock_bh(&tgt->tgt_lock); |
1092 | 1093 | ||
1093 | wait_for_completion(&io_req->tm_done); | 1094 | /* |
1094 | 1095 | * Can't wait forever on cleanup response lest we let the SCSI error | |
1096 | * handler wait forever | ||
1097 | */ | ||
1098 | time_left = wait_for_completion_timeout(&io_req->tm_done, | ||
1099 | BNX2FC_FW_TIMEOUT); | ||
1095 | io_req->wait_for_comp = 0; | 1100 | io_req->wait_for_comp = 0; |
1101 | if (!time_left) | ||
1102 | BNX2FC_IO_DBG(io_req, "%s(): Wait for cleanup timed out.\n", | ||
1103 | __func__); | ||
1104 | |||
1096 | /* | 1105 | /* |
1097 | * release the reference taken in eh_abort to allow the | 1106 | * Release reference held by SCSI command the cleanup completion |
1098 | * target to re-login after flushing IOs | 1107 | * hits the BNX2FC_CLEANUP case in bnx2fc_process_cq_compl() and |
1108 | * thus the SCSI command is not returnedi by bnx2fc_scsi_done(). | ||
1099 | */ | 1109 | */ |
1100 | kref_put(&io_req->refcount, bnx2fc_cmd_release); | 1110 | kref_put(&io_req->refcount, bnx2fc_cmd_release); |
1101 | 1111 | ||
1102 | spin_lock_bh(&tgt->tgt_lock); | 1112 | spin_lock_bh(&tgt->tgt_lock); |
1103 | return rc; | 1113 | return rc; |
1104 | } | 1114 | } |
1115 | |||
1105 | /** | 1116 | /** |
1106 | * bnx2fc_eh_abort - eh_abort_handler api to abort an outstanding | 1117 | * bnx2fc_eh_abort - eh_abort_handler api to abort an outstanding |
1107 | * SCSI command | 1118 | * SCSI command |
@@ -1118,6 +1129,7 @@ int bnx2fc_eh_abort(struct scsi_cmnd *sc_cmd) | |||
1118 | struct fc_lport *lport; | 1129 | struct fc_lport *lport; |
1119 | struct bnx2fc_rport *tgt; | 1130 | struct bnx2fc_rport *tgt; |
1120 | int rc; | 1131 | int rc; |
1132 | unsigned int time_left; | ||
1121 | 1133 | ||
1122 | rc = fc_block_scsi_eh(sc_cmd); | 1134 | rc = fc_block_scsi_eh(sc_cmd); |
1123 | if (rc) | 1135 | if (rc) |
@@ -1194,6 +1206,11 @@ int bnx2fc_eh_abort(struct scsi_cmnd *sc_cmd) | |||
1194 | if (cancel_delayed_work(&io_req->timeout_work)) | 1206 | if (cancel_delayed_work(&io_req->timeout_work)) |
1195 | kref_put(&io_req->refcount, | 1207 | kref_put(&io_req->refcount, |
1196 | bnx2fc_cmd_release); /* drop timer hold */ | 1208 | bnx2fc_cmd_release); /* drop timer hold */ |
1209 | /* | ||
1210 | * We don't want to hold off the upper layer timer so simply | ||
1211 | * cleanup the command and return that I/O was successfully | ||
1212 | * aborted. | ||
1213 | */ | ||
1197 | rc = bnx2fc_abts_cleanup(io_req); | 1214 | rc = bnx2fc_abts_cleanup(io_req); |
1198 | /* This only occurs when an task abort was requested while ABTS | 1215 | /* This only occurs when an task abort was requested while ABTS |
1199 | is in progress. Setting the IO_CLEANUP flag will skip the | 1216 | is in progress. Setting the IO_CLEANUP flag will skip the |
@@ -1201,7 +1218,7 @@ int bnx2fc_eh_abort(struct scsi_cmnd *sc_cmd) | |||
1201 | was a result from the ABTS request rather than the CLEANUP | 1218 | was a result from the ABTS request rather than the CLEANUP |
1202 | request */ | 1219 | request */ |
1203 | set_bit(BNX2FC_FLAG_IO_CLEANUP, &io_req->req_flags); | 1220 | set_bit(BNX2FC_FLAG_IO_CLEANUP, &io_req->req_flags); |
1204 | goto out; | 1221 | goto done; |
1205 | } | 1222 | } |
1206 | 1223 | ||
1207 | /* Cancel the current timer running on this io_req */ | 1224 | /* Cancel the current timer running on this io_req */ |
@@ -1221,7 +1238,11 @@ int bnx2fc_eh_abort(struct scsi_cmnd *sc_cmd) | |||
1221 | } | 1238 | } |
1222 | spin_unlock_bh(&tgt->tgt_lock); | 1239 | spin_unlock_bh(&tgt->tgt_lock); |
1223 | 1240 | ||
1224 | wait_for_completion(&io_req->tm_done); | 1241 | /* Wait 2 * RA_TOV + 1 to be sure timeout function hasn't fired */ |
1242 | time_left = wait_for_completion_timeout(&io_req->tm_done, | ||
1243 | (2 * rp->r_a_tov + 1) * HZ); | ||
1244 | if (time_left) | ||
1245 | BNX2FC_IO_DBG(io_req, "Timed out in eh_abort waiting for tm_done"); | ||
1225 | 1246 | ||
1226 | spin_lock_bh(&tgt->tgt_lock); | 1247 | spin_lock_bh(&tgt->tgt_lock); |
1227 | io_req->wait_for_comp = 0; | 1248 | io_req->wait_for_comp = 0; |
@@ -1233,8 +1254,12 @@ int bnx2fc_eh_abort(struct scsi_cmnd *sc_cmd) | |||
1233 | /* Let the scsi-ml try to recover this command */ | 1254 | /* Let the scsi-ml try to recover this command */ |
1234 | printk(KERN_ERR PFX "abort failed, xid = 0x%x\n", | 1255 | printk(KERN_ERR PFX "abort failed, xid = 0x%x\n", |
1235 | io_req->xid); | 1256 | io_req->xid); |
1257 | /* | ||
1258 | * Cleanup firmware residuals before returning control back | ||
1259 | * to SCSI ML. | ||
1260 | */ | ||
1236 | rc = bnx2fc_abts_cleanup(io_req); | 1261 | rc = bnx2fc_abts_cleanup(io_req); |
1237 | goto out; | 1262 | goto done; |
1238 | } else { | 1263 | } else { |
1239 | /* | 1264 | /* |
1240 | * We come here even when there was a race condition | 1265 | * We come here even when there was a race condition |
@@ -1249,7 +1274,6 @@ int bnx2fc_eh_abort(struct scsi_cmnd *sc_cmd) | |||
1249 | done: | 1274 | done: |
1250 | /* release the reference taken in eh_abort */ | 1275 | /* release the reference taken in eh_abort */ |
1251 | kref_put(&io_req->refcount, bnx2fc_cmd_release); | 1276 | kref_put(&io_req->refcount, bnx2fc_cmd_release); |
1252 | out: | ||
1253 | spin_unlock_bh(&tgt->tgt_lock); | 1277 | spin_unlock_bh(&tgt->tgt_lock); |
1254 | return rc; | 1278 | return rc; |
1255 | } | 1279 | } |