aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthew R. Ochs <mrochs@linux.vnet.ibm.com>2015-10-21 16:15:52 -0400
committerJames Bottomley <JBottomley@Odin.com>2015-10-30 04:22:11 -0400
commitaacb4ff69eea4ac47a7389f90ea7a896abbe92f5 (patch)
tree8318e4ff5a29e309f91786c7beb386cb97524d5d
parentfa3f2c6eb1eb69a9023d648c5bafbf4f062ab84d (diff)
cxlflash: Fix to avoid potential deadlock on EEH
Ioctl threads that use scsi_execute() can run for an excessive amount of time due to the fact that they have lengthy timeouts and retry logic built in. Under normal operation this is not an issue. However, once EEH enters the picture, a long execution time coupled with the possibility that a timeout can trigger entry to the driver via registered reset callbacks becomes a liability. In particular, a deadlock can occur when an EEH event is encountered while in running in scsi_execute(). As part of the recovery, the EEH handler drains all currently running ioctls, waiting until they have completed before proceeding with a reset. As the scsi_execute()'s are situated on the ioctl path, the EEH handler will wait until they (and the remainder of the ioctl handler they're associated with) have completed. Normally this would not be much of an issue aside from the longer recovery period. Unfortunately, the scsi_execute() triggers a reset when it times out. The reset handler will see that the device is already being reset and wait until that reset completed. This creates a condition where the EEH handler becomes stuck, infinitely waiting for the ioctl thread to complete. To avoid this behavior, temporarily unmark the scsi_execute() threads as an ioctl thread by releasing the ioctl read semaphore. This allows the EEH handler to proceed with a recovery while the thread is still running. Once the scsi_execute() returns, the ioctl read semaphore is reacquired and the adapter state is rechecked in case it changed while inside of scsi_execute(). The state check will wait if the adapter is still being recovered or returns a failure if the recovery failed. In the event that the adapter reset failed, the failure is simply returned as the ioctl would be unable to continue. Reported-by: Brian King <brking@linux.vnet.ibm.com> Signed-off-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com> Signed-off-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com> Reviewed-by: Brian King <brking@linux.vnet.ibm.com> Reviewed-by: Daniel Axtens <dja@axtens.net> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: James Bottomley <JBottomley@Odin.com>
-rw-r--r--drivers/scsi/cxlflash/superpipe.c30
-rw-r--r--drivers/scsi/cxlflash/superpipe.h2
-rw-r--r--drivers/scsi/cxlflash/vlun.c29
3 files changed, 60 insertions, 1 deletions
diff --git a/drivers/scsi/cxlflash/superpipe.c b/drivers/scsi/cxlflash/superpipe.c
index 34acb587d730..18a5e11aac3a 100644
--- a/drivers/scsi/cxlflash/superpipe.c
+++ b/drivers/scsi/cxlflash/superpipe.c
@@ -283,6 +283,24 @@ out:
283 * @sdev: SCSI device associated with LUN. 283 * @sdev: SCSI device associated with LUN.
284 * @lli: LUN destined for capacity request. 284 * @lli: LUN destined for capacity request.
285 * 285 *
286 * The READ_CAP16 can take quite a while to complete. Should an EEH occur while
287 * in scsi_execute(), the EEH handler will attempt to recover. As part of the
288 * recovery, the handler drains all currently running ioctls, waiting until they
289 * have completed before proceeding with a reset. As this routine is used on the
290 * ioctl path, this can create a condition where the EEH handler becomes stuck,
291 * infinitely waiting for this ioctl thread. To avoid this behavior, temporarily
292 * unmark this thread as an ioctl thread by releasing the ioctl read semaphore.
293 * This will allow the EEH handler to proceed with a recovery while this thread
294 * is still running. Once the scsi_execute() returns, reacquire the ioctl read
295 * semaphore and check the adapter state in case it changed while inside of
296 * scsi_execute(). The state check will wait if the adapter is still being
297 * recovered or return a failure if the recovery failed. In the event that the
298 * adapter reset failed, simply return the failure as the ioctl would be unable
299 * to continue.
300 *
301 * Note that the above puts a requirement on this routine to only be called on
302 * an ioctl thread.
303 *
286 * Return: 0 on success, -errno on failure 304 * Return: 0 on success, -errno on failure
287 */ 305 */
288static int read_cap16(struct scsi_device *sdev, struct llun_info *lli) 306static int read_cap16(struct scsi_device *sdev, struct llun_info *lli)
@@ -314,8 +332,18 @@ retry:
314 dev_dbg(dev, "%s: %ssending cmd(0x%x)\n", __func__, 332 dev_dbg(dev, "%s: %ssending cmd(0x%x)\n", __func__,
315 retry_cnt ? "re" : "", scsi_cmd[0]); 333 retry_cnt ? "re" : "", scsi_cmd[0]);
316 334
335 /* Drop the ioctl read semahpore across lengthy call */
336 up_read(&cfg->ioctl_rwsem);
317 result = scsi_execute(sdev, scsi_cmd, DMA_FROM_DEVICE, cmd_buf, 337 result = scsi_execute(sdev, scsi_cmd, DMA_FROM_DEVICE, cmd_buf,
318 CMD_BUFSIZE, sense_buf, to, CMD_RETRIES, 0, NULL); 338 CMD_BUFSIZE, sense_buf, to, CMD_RETRIES, 0, NULL);
339 down_read(&cfg->ioctl_rwsem);
340 rc = check_state(cfg);
341 if (rc) {
342 dev_err(dev, "%s: Failed state! result=0x08%X\n",
343 __func__, result);
344 rc = -ENODEV;
345 goto out;
346 }
319 347
320 if (driver_byte(result) == DRIVER_SENSE) { 348 if (driver_byte(result) == DRIVER_SENSE) {
321 result &= ~(0xFF<<24); /* DRIVER_SENSE is not an error */ 349 result &= ~(0xFF<<24); /* DRIVER_SENSE is not an error */
@@ -1221,7 +1249,7 @@ static const struct file_operations null_fops = {
1221 * 1249 *
1222 * Return: 0 on success, -errno on failure 1250 * Return: 0 on success, -errno on failure
1223 */ 1251 */
1224static int check_state(struct cxlflash_cfg *cfg) 1252int check_state(struct cxlflash_cfg *cfg)
1225{ 1253{
1226 struct device *dev = &cfg->dev->dev; 1254 struct device *dev = &cfg->dev->dev;
1227 int rc = 0; 1255 int rc = 0;
diff --git a/drivers/scsi/cxlflash/superpipe.h b/drivers/scsi/cxlflash/superpipe.h
index 7df88eeea9c0..06a805ab1439 100644
--- a/drivers/scsi/cxlflash/superpipe.h
+++ b/drivers/scsi/cxlflash/superpipe.h
@@ -147,4 +147,6 @@ void cxlflash_ba_terminate(struct ba_lun *);
147 147
148int cxlflash_manage_lun(struct scsi_device *, struct dk_cxlflash_manage_lun *); 148int cxlflash_manage_lun(struct scsi_device *, struct dk_cxlflash_manage_lun *);
149 149
150int check_state(struct cxlflash_cfg *);
151
150#endif /* ifndef _CXLFLASH_SUPERPIPE_H */ 152#endif /* ifndef _CXLFLASH_SUPERPIPE_H */
diff --git a/drivers/scsi/cxlflash/vlun.c b/drivers/scsi/cxlflash/vlun.c
index b0eaf557cc2f..a53f583e2d7b 100644
--- a/drivers/scsi/cxlflash/vlun.c
+++ b/drivers/scsi/cxlflash/vlun.c
@@ -400,6 +400,24 @@ static int init_vlun(struct llun_info *lli)
400 * @lba: Logical block address to start write same. 400 * @lba: Logical block address to start write same.
401 * @nblks: Number of logical blocks to write same. 401 * @nblks: Number of logical blocks to write same.
402 * 402 *
403 * The SCSI WRITE_SAME16 can take quite a while to complete. Should an EEH occur
404 * while in scsi_execute(), the EEH handler will attempt to recover. As part of
405 * the recovery, the handler drains all currently running ioctls, waiting until
406 * they have completed before proceeding with a reset. As this routine is used
407 * on the ioctl path, this can create a condition where the EEH handler becomes
408 * stuck, infinitely waiting for this ioctl thread. To avoid this behavior,
409 * temporarily unmark this thread as an ioctl thread by releasing the ioctl read
410 * semaphore. This will allow the EEH handler to proceed with a recovery while
411 * this thread is still running. Once the scsi_execute() returns, reacquire the
412 * ioctl read semaphore and check the adapter state in case it changed while
413 * inside of scsi_execute(). The state check will wait if the adapter is still
414 * being recovered or return a failure if the recovery failed. In the event that
415 * the adapter reset failed, simply return the failure as the ioctl would be
416 * unable to continue.
417 *
418 * Note that the above puts a requirement on this routine to only be called on
419 * an ioctl thread.
420 *
403 * Return: 0 on success, -errno on failure 421 * Return: 0 on success, -errno on failure
404 */ 422 */
405static int write_same16(struct scsi_device *sdev, 423static int write_same16(struct scsi_device *sdev,
@@ -433,9 +451,20 @@ static int write_same16(struct scsi_device *sdev,
433 put_unaligned_be32(ws_limit < left ? ws_limit : left, 451 put_unaligned_be32(ws_limit < left ? ws_limit : left,
434 &scsi_cmd[10]); 452 &scsi_cmd[10]);
435 453
454 /* Drop the ioctl read semahpore across lengthy call */
455 up_read(&cfg->ioctl_rwsem);
436 result = scsi_execute(sdev, scsi_cmd, DMA_TO_DEVICE, cmd_buf, 456 result = scsi_execute(sdev, scsi_cmd, DMA_TO_DEVICE, cmd_buf,
437 CMD_BUFSIZE, sense_buf, to, CMD_RETRIES, 457 CMD_BUFSIZE, sense_buf, to, CMD_RETRIES,
438 0, NULL); 458 0, NULL);
459 down_read(&cfg->ioctl_rwsem);
460 rc = check_state(cfg);
461 if (rc) {
462 dev_err(dev, "%s: Failed state! result=0x08%X\n",
463 __func__, result);
464 rc = -ENODEV;
465 goto out;
466 }
467
439 if (result) { 468 if (result) {
440 dev_err_ratelimited(dev, "%s: command failed for " 469 dev_err_ratelimited(dev, "%s: command failed for "
441 "offset %lld result=0x%x\n", 470 "offset %lld result=0x%x\n",