aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteffen Maier <maier@linux.vnet.ibm.com>2016-12-09 11:16:33 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-01-09 02:32:21 -0500
commit2a940b853ef679cd981e38f23c756d4335a3375c (patch)
treed9e9c822a9a1ebd11627f838f2545a9f4185e6d6
parent744807cb251f331ca0720b8c81a54e9db7b13ac5 (diff)
scsi: zfcp: fix rport unblock race with LUN recovery
commit 6f2ce1c6af37191640ee3ff6e8fc39ea10352f4c upstream. It is unavoidable that zfcp_scsi_queuecommand() has to finish requests with DID_IMM_RETRY (like fc_remote_port_chkready()) during the time window when zfcp detected an unavailable rport but fc_remote_port_delete(), which is asynchronous via zfcp_scsi_schedule_rport_block(), has not yet blocked the rport. However, for the case when the rport becomes available again, we should prevent unblocking the rport too early. In contrast to other FCP LLDDs, zfcp has to open each LUN with the FCP channel hardware before it can send I/O to a LUN. So if a port already has LUNs attached and we unblock the rport just after port recovery, recoveries of LUNs behind this port can still be pending which in turn force zfcp_scsi_queuecommand() to unnecessarily finish requests with DID_IMM_RETRY. This also opens a time window with unblocked rport (until the followup LUN reopen recovery has finished). If a scsi_cmnd timeout occurs during this time window fc_timed_out() cannot work as desired and such command would indeed time out and trigger scsi_eh. This prevents a clean and timely path failover. This should not happen if the path issue can be recovered on FC transport layer such as path issues involving RSCNs. Fix this by only calling zfcp_scsi_schedule_rport_register(), to asynchronously trigger fc_remote_port_add(), after all LUN recoveries as children of the rport have finished and no new recoveries of equal or higher order were triggered meanwhile. Finished intentionally includes any recovery result no matter if successful or failed (still unblock rport so other successful LUNs work). For simplicity, we check after each finished LUN recovery if there is another LUN recovery pending on the same port and then do nothing. We handle the special case of a successful recovery of a port without LUN children the same way without changing this case's semantics. For debugging we introduce 2 new trace records written if the rport unblock attempt was aborted due to still unfinished or freshly triggered recovery. The records are only written above the default trace level. Benjamin noticed the important special case of new recovery that can be triggered between having given up the erp_lock and before calling zfcp_erp_action_cleanup() within zfcp_erp_strategy(). We must avoid the following sequence: ERP thread rport_work other context ------------------------- -------------- -------------------------------- port is unblocked, rport still blocked, due to pending/running ERP action, so ((port->status & ...UNBLOCK) != 0) and (port->rport == NULL) unlock ERP zfcp_erp_action_cleanup() case ZFCP_ERP_ACTION_REOPEN_LUN: zfcp_erp_try_rport_unblock() ((status & ...UNBLOCK) != 0) [OLD!] zfcp_erp_port_reopen() lock ERP zfcp_erp_port_block() port->status clear ...UNBLOCK unlock ERP zfcp_scsi_schedule_rport_block() port->rport_task = RPORT_DEL queue_work(rport_work) zfcp_scsi_rport_work() (port->rport_task != RPORT_ADD) port->rport_task = RPORT_NONE zfcp_scsi_rport_block() if (!port->rport) return zfcp_scsi_schedule_rport_register() port->rport_task = RPORT_ADD queue_work(rport_work) zfcp_scsi_rport_work() (port->rport_task == RPORT_ADD) port->rport_task = RPORT_NONE zfcp_scsi_rport_register() (port->rport == NULL) rport = fc_remote_port_add() port->rport = rport; Now the rport was erroneously unblocked while the zfcp_port is blocked. This is another situation we want to avoid due to scsi_eh potential. This state would at least remain until the new recovery from the other context finished successfully, or potentially forever if it failed. In order to close this race, we take the erp_lock inside zfcp_erp_try_rport_unblock() when checking the status of zfcp_port or LUN. With that, the possible corresponding rport state sequences would be: (unblock[ERP thread],block[other context]) if the ERP thread gets erp_lock first and still sees ((port->status & ...UNBLOCK) != 0), (block[other context],NOP[ERP thread]) if the ERP thread gets erp_lock after the other context has already cleard ...UNBLOCK from port->status. Since checking fields of struct erp_action is unsafe because they could have been overwritten (re-used for new recovery) meanwhile, we only check status of zfcp_port and LUN since these are only changed under erp_lock elsewhere. Regarding the check of the proper status flags (port or port_forced are similar to the shown adapter recovery): [zfcp_erp_adapter_shutdown()] zfcp_erp_adapter_reopen() zfcp_erp_adapter_block() * clear UNBLOCK ---------------------------------------+ zfcp_scsi_schedule_rports_block() | write_lock_irqsave(&adapter->erp_lock, flags);-------+ | zfcp_erp_action_enqueue() | | zfcp_erp_setup_act() | | * set ERP_INUSE -----------------------------------|--|--+ write_unlock_irqrestore(&adapter->erp_lock, flags);--+ | | .context-switch. | | zfcp_erp_thread() | | zfcp_erp_strategy() | | write_lock_irqsave(&adapter->erp_lock, flags);------+ | | ... | | | zfcp_erp_strategy_check_target() | | | zfcp_erp_strategy_check_adapter() | | | zfcp_erp_adapter_unblock() | | | * set UNBLOCK -----------------------------------|--+ | zfcp_erp_action_dequeue() | | * clear ERP_INUSE ---------------------------------|-----+ ... | write_unlock_irqrestore(&adapter->erp_lock, flags);-+ Hence, we should check for both UNBLOCK and ERP_INUSE because they are interleaved. Also we need to explicitly check ERP_FAILED for the link down case which currently does not clear the UNBLOCK flag in zfcp_fsf_link_down_info_eval(). Signed-off-by: Steffen Maier <maier@linux.vnet.ibm.com> Fixes: 8830271c4819 ("[SCSI] zfcp: Dont fail SCSI commands when transitioning to blocked fc_rport") Fixes: a2fa0aede07c ("[SCSI] zfcp: Block FC transport rports early on errors") Fixes: 5f852be9e11d ("[SCSI] zfcp: Fix deadlock between zfcp ERP and SCSI") Fixes: 338151e06608 ("[SCSI] zfcp: make use of fc_remote_port_delete when target port is unavailable") Fixes: 3859f6a248cb ("[PATCH] zfcp: add rports to enable scsi_add_device to work again") Reviewed-by: Benjamin Block <bblock@linux.vnet.ibm.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--drivers/s390/scsi/zfcp_dbf.c17
-rw-r--r--drivers/s390/scsi/zfcp_erp.c61
-rw-r--r--drivers/s390/scsi/zfcp_ext.h4
-rw-r--r--drivers/s390/scsi/zfcp_scsi.c4
4 files changed, 77 insertions, 9 deletions
diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c
index 581001989937..d5bf36ec8a75 100644
--- a/drivers/s390/scsi/zfcp_dbf.c
+++ b/drivers/s390/scsi/zfcp_dbf.c
@@ -289,11 +289,12 @@ void zfcp_dbf_rec_trig(char *tag, struct zfcp_adapter *adapter,
289 289
290 290
291/** 291/**
292 * zfcp_dbf_rec_run - trace event related to running recovery 292 * zfcp_dbf_rec_run_lvl - trace event related to running recovery
293 * @level: trace level to be used for event
293 * @tag: identifier for event 294 * @tag: identifier for event
294 * @erp: erp_action running 295 * @erp: erp_action running
295 */ 296 */
296void zfcp_dbf_rec_run(char *tag, struct zfcp_erp_action *erp) 297void zfcp_dbf_rec_run_lvl(int level, char *tag, struct zfcp_erp_action *erp)
297{ 298{
298 struct zfcp_dbf *dbf = erp->adapter->dbf; 299 struct zfcp_dbf *dbf = erp->adapter->dbf;
299 struct zfcp_dbf_rec *rec = &dbf->rec_buf; 300 struct zfcp_dbf_rec *rec = &dbf->rec_buf;
@@ -319,11 +320,21 @@ void zfcp_dbf_rec_run(char *tag, struct zfcp_erp_action *erp)
319 else 320 else
320 rec->u.run.rec_count = atomic_read(&erp->adapter->erp_counter); 321 rec->u.run.rec_count = atomic_read(&erp->adapter->erp_counter);
321 322
322 debug_event(dbf->rec, 1, rec, sizeof(*rec)); 323 debug_event(dbf->rec, level, rec, sizeof(*rec));
323 spin_unlock_irqrestore(&dbf->rec_lock, flags); 324 spin_unlock_irqrestore(&dbf->rec_lock, flags);
324} 325}
325 326
326/** 327/**
328 * zfcp_dbf_rec_run - trace event related to running recovery
329 * @tag: identifier for event
330 * @erp: erp_action running
331 */
332void zfcp_dbf_rec_run(char *tag, struct zfcp_erp_action *erp)
333{
334 zfcp_dbf_rec_run_lvl(1, tag, erp);
335}
336
337/**
327 * zfcp_dbf_rec_run_wka - trace wka port event with info like running recovery 338 * zfcp_dbf_rec_run_wka - trace wka port event with info like running recovery
328 * @tag: identifier for event 339 * @tag: identifier for event
329 * @wka_port: well known address port 340 * @wka_port: well known address port
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index a59d678125bd..7ccfce559034 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Error Recovery Procedures (ERP). 4 * Error Recovery Procedures (ERP).
5 * 5 *
6 * Copyright IBM Corp. 2002, 2015 6 * Copyright IBM Corp. 2002, 2016
7 */ 7 */
8 8
9#define KMSG_COMPONENT "zfcp" 9#define KMSG_COMPONENT "zfcp"
@@ -1204,6 +1204,62 @@ static void zfcp_erp_action_dequeue(struct zfcp_erp_action *erp_action)
1204 } 1204 }
1205} 1205}
1206 1206
1207/**
1208 * zfcp_erp_try_rport_unblock - unblock rport if no more/new recovery
1209 * @port: zfcp_port whose fc_rport we should try to unblock
1210 */
1211static void zfcp_erp_try_rport_unblock(struct zfcp_port *port)
1212{
1213 unsigned long flags;
1214 struct zfcp_adapter *adapter = port->adapter;
1215 int port_status;
1216 struct Scsi_Host *shost = adapter->scsi_host;
1217 struct scsi_device *sdev;
1218
1219 write_lock_irqsave(&adapter->erp_lock, flags);
1220 port_status = atomic_read(&port->status);
1221 if ((port_status & ZFCP_STATUS_COMMON_UNBLOCKED) == 0 ||
1222 (port_status & (ZFCP_STATUS_COMMON_ERP_INUSE |
1223 ZFCP_STATUS_COMMON_ERP_FAILED)) != 0) {
1224 /* new ERP of severity >= port triggered elsewhere meanwhile or
1225 * local link down (adapter erp_failed but not clear unblock)
1226 */
1227 zfcp_dbf_rec_run_lvl(4, "ertru_p", &port->erp_action);
1228 write_unlock_irqrestore(&adapter->erp_lock, flags);
1229 return;
1230 }
1231 spin_lock(shost->host_lock);
1232 __shost_for_each_device(sdev, shost) {
1233 struct zfcp_scsi_dev *zsdev = sdev_to_zfcp(sdev);
1234 int lun_status;
1235
1236 if (zsdev->port != port)
1237 continue;
1238 /* LUN under port of interest */
1239 lun_status = atomic_read(&zsdev->status);
1240 if ((lun_status & ZFCP_STATUS_COMMON_ERP_FAILED) != 0)
1241 continue; /* unblock rport despite failed LUNs */
1242 /* LUN recovery not given up yet [maybe follow-up pending] */
1243 if ((lun_status & ZFCP_STATUS_COMMON_UNBLOCKED) == 0 ||
1244 (lun_status & ZFCP_STATUS_COMMON_ERP_INUSE) != 0) {
1245 /* LUN blocked:
1246 * not yet unblocked [LUN recovery pending]
1247 * or meanwhile blocked [new LUN recovery triggered]
1248 */
1249 zfcp_dbf_rec_run_lvl(4, "ertru_l", &zsdev->erp_action);
1250 spin_unlock(shost->host_lock);
1251 write_unlock_irqrestore(&adapter->erp_lock, flags);
1252 return;
1253 }
1254 }
1255 /* now port has no child or all children have completed recovery,
1256 * and no ERP of severity >= port was meanwhile triggered elsewhere
1257 */
1258 zfcp_scsi_schedule_rport_register(port);
1259 spin_unlock(shost->host_lock);
1260 write_unlock_irqrestore(&adapter->erp_lock, flags);
1261}
1262
1207static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result) 1263static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result)
1208{ 1264{
1209 struct zfcp_adapter *adapter = act->adapter; 1265 struct zfcp_adapter *adapter = act->adapter;
@@ -1214,6 +1270,7 @@ static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result)
1214 case ZFCP_ERP_ACTION_REOPEN_LUN: 1270 case ZFCP_ERP_ACTION_REOPEN_LUN:
1215 if (!(act->status & ZFCP_STATUS_ERP_NO_REF)) 1271 if (!(act->status & ZFCP_STATUS_ERP_NO_REF))
1216 scsi_device_put(sdev); 1272 scsi_device_put(sdev);
1273 zfcp_erp_try_rport_unblock(port);
1217 break; 1274 break;
1218 1275
1219 case ZFCP_ERP_ACTION_REOPEN_PORT: 1276 case ZFCP_ERP_ACTION_REOPEN_PORT:
@@ -1224,7 +1281,7 @@ static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result)
1224 */ 1281 */
1225 if (act->step != ZFCP_ERP_STEP_UNINITIALIZED) 1282 if (act->step != ZFCP_ERP_STEP_UNINITIALIZED)
1226 if (result == ZFCP_ERP_SUCCEEDED) 1283 if (result == ZFCP_ERP_SUCCEEDED)
1227 zfcp_scsi_schedule_rport_register(port); 1284 zfcp_erp_try_rport_unblock(port);
1228 /* fall through */ 1285 /* fall through */
1229 case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: 1286 case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED:
1230 put_device(&port->dev); 1287 put_device(&port->dev);
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index c8fed9fa1cca..21c8c689b02b 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -3,7 +3,7 @@
3 * 3 *
4 * External function declarations. 4 * External function declarations.
5 * 5 *
6 * Copyright IBM Corp. 2002, 2015 6 * Copyright IBM Corp. 2002, 2016
7 */ 7 */
8 8
9#ifndef ZFCP_EXT_H 9#ifndef ZFCP_EXT_H
@@ -35,6 +35,8 @@ extern void zfcp_dbf_adapter_unregister(struct zfcp_adapter *);
35extern void zfcp_dbf_rec_trig(char *, struct zfcp_adapter *, 35extern void zfcp_dbf_rec_trig(char *, struct zfcp_adapter *,
36 struct zfcp_port *, struct scsi_device *, u8, u8); 36 struct zfcp_port *, struct scsi_device *, u8, u8);
37extern void zfcp_dbf_rec_run(char *, struct zfcp_erp_action *); 37extern void zfcp_dbf_rec_run(char *, struct zfcp_erp_action *);
38extern void zfcp_dbf_rec_run_lvl(int level, char *tag,
39 struct zfcp_erp_action *erp);
38extern void zfcp_dbf_rec_run_wka(char *, struct zfcp_fc_wka_port *, u64); 40extern void zfcp_dbf_rec_run_wka(char *, struct zfcp_fc_wka_port *, u64);
39extern void zfcp_dbf_hba_fsf_uss(char *, struct zfcp_fsf_req *); 41extern void zfcp_dbf_hba_fsf_uss(char *, struct zfcp_fsf_req *);
40extern void zfcp_dbf_hba_fsf_res(char *, int, struct zfcp_fsf_req *); 42extern void zfcp_dbf_hba_fsf_res(char *, int, struct zfcp_fsf_req *);
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 11cd18c134c1..07ffdbb5107f 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -88,9 +88,7 @@ int zfcp_scsi_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scpnt)
88 } 88 }
89 89
90 if (unlikely(!(status & ZFCP_STATUS_COMMON_UNBLOCKED))) { 90 if (unlikely(!(status & ZFCP_STATUS_COMMON_UNBLOCKED))) {
91 /* This could be either 91 /* This could be
92 * open LUN pending: this is temporary, will result in
93 * open LUN or ERP_FAILED, so retry command
94 * call to rport_delete pending: mimic retry from 92 * call to rport_delete pending: mimic retry from
95 * fc_remote_port_chkready until rport is BLOCKED 93 * fc_remote_port_chkready until rport is BLOCKED
96 */ 94 */