aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian King <brking@linux.vnet.ibm.com>2007-01-30 18:51:17 -0500
committerJames Bottomley <jejb@mulgrave.il.steeleye.com>2007-02-03 09:32:10 -0500
commit292148f8bb2b5d120440e046d24de07a739461aa (patch)
treeb87e6805e4fd9d7c0306c3ea350049275156cb0f
parent214fbb75075efa677b614be79a2d62dd79785b4f (diff)
[SCSI] scsi_error: Fix lost EH commands
If an EH command times out today, the LLDD's abort handler will be called to abort the command. It is assumed that this completes successfully, which can result in the command getting completed later resulting in an oops. Improve the current implementation by escalating all the way to host reset if necessary in order to clean up the EH command. Signed-off-by: Brian King <brking@linux.vnet.ibm.com> Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
-rw-r--r--drivers/scsi/scsi_error.c239
1 files changed, 123 insertions, 116 deletions
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 2dce06a58c08..b8edcf5b5451 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -458,6 +458,128 @@ static void scsi_eh_done(struct scsi_cmnd *scmd)
458} 458}
459 459
460/** 460/**
461 * scsi_try_host_reset - ask host adapter to reset itself
462 * @scmd: SCSI cmd to send hsot reset.
463 **/
464static int scsi_try_host_reset(struct scsi_cmnd *scmd)
465{
466 unsigned long flags;
467 int rtn;
468
469 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n",
470 __FUNCTION__));
471
472 if (!scmd->device->host->hostt->eh_host_reset_handler)
473 return FAILED;
474
475 rtn = scmd->device->host->hostt->eh_host_reset_handler(scmd);
476
477 if (rtn == SUCCESS) {
478 if (!scmd->device->host->hostt->skip_settle_delay)
479 ssleep(HOST_RESET_SETTLE_TIME);
480 spin_lock_irqsave(scmd->device->host->host_lock, flags);
481 scsi_report_bus_reset(scmd->device->host,
482 scmd_channel(scmd));
483 spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
484 }
485
486 return rtn;
487}
488
489/**
490 * scsi_try_bus_reset - ask host to perform a bus reset
491 * @scmd: SCSI cmd to send bus reset.
492 **/
493static int scsi_try_bus_reset(struct scsi_cmnd *scmd)
494{
495 unsigned long flags;
496 int rtn;
497
498 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Bus RST\n",
499 __FUNCTION__));
500
501 if (!scmd->device->host->hostt->eh_bus_reset_handler)
502 return FAILED;
503
504 rtn = scmd->device->host->hostt->eh_bus_reset_handler(scmd);
505
506 if (rtn == SUCCESS) {
507 if (!scmd->device->host->hostt->skip_settle_delay)
508 ssleep(BUS_RESET_SETTLE_TIME);
509 spin_lock_irqsave(scmd->device->host->host_lock, flags);
510 scsi_report_bus_reset(scmd->device->host,
511 scmd_channel(scmd));
512 spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
513 }
514
515 return rtn;
516}
517
518/**
519 * scsi_try_bus_device_reset - Ask host to perform a BDR on a dev
520 * @scmd: SCSI cmd used to send BDR
521 *
522 * Notes:
523 * There is no timeout for this operation. if this operation is
524 * unreliable for a given host, then the host itself needs to put a
525 * timer on it, and set the host back to a consistent state prior to
526 * returning.
527 **/
528static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd)
529{
530 int rtn;
531
532 if (!scmd->device->host->hostt->eh_device_reset_handler)
533 return FAILED;
534
535 rtn = scmd->device->host->hostt->eh_device_reset_handler(scmd);
536 if (rtn == SUCCESS) {
537 scmd->device->was_reset = 1;
538 scmd->device->expecting_cc_ua = 1;
539 }
540
541 return rtn;
542}
543
544static int __scsi_try_to_abort_cmd(struct scsi_cmnd *scmd)
545{
546 if (!scmd->device->host->hostt->eh_abort_handler)
547 return FAILED;
548
549 return scmd->device->host->hostt->eh_abort_handler(scmd);
550}
551
552/**
553 * scsi_try_to_abort_cmd - Ask host to abort a running command.
554 * @scmd: SCSI cmd to abort from Lower Level.
555 *
556 * Notes:
557 * This function will not return until the user's completion function
558 * has been called. there is no timeout on this operation. if the
559 * author of the low-level driver wishes this operation to be timed,
560 * they can provide this facility themselves. helper functions in
561 * scsi_error.c can be supplied to make this easier to do.
562 **/
563static int scsi_try_to_abort_cmd(struct scsi_cmnd *scmd)
564{
565 /*
566 * scsi_done was called just after the command timed out and before
567 * we had a chance to process it. (db)
568 */
569 if (scmd->serial_number == 0)
570 return SUCCESS;
571 return __scsi_try_to_abort_cmd(scmd);
572}
573
574static void scsi_abort_eh_cmnd(struct scsi_cmnd *scmd)
575{
576 if (__scsi_try_to_abort_cmd(scmd) != SUCCESS)
577 if (scsi_try_bus_device_reset(scmd) != SUCCESS)
578 if (scsi_try_bus_reset(scmd) != SUCCESS)
579 scsi_try_host_reset(scmd);
580}
581
582/**
461 * scsi_send_eh_cmnd - submit a scsi command as part of error recory 583 * scsi_send_eh_cmnd - submit a scsi command as part of error recory
462 * @scmd: SCSI command structure to hijack 584 * @scmd: SCSI command structure to hijack
463 * @cmnd: CDB to send 585 * @cmnd: CDB to send
@@ -584,13 +706,7 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd,
584 break; 706 break;
585 } 707 }
586 } else { 708 } else {
587 /* 709 scsi_abort_eh_cmnd(scmd);
588 * FIXME(eric) - we are not tracking whether we could
589 * abort a timed out command or not. not sure how
590 * we should treat them differently anyways.
591 */
592 if (shost->hostt->eh_abort_handler)
593 shost->hostt->eh_abort_handler(scmd);
594 rtn = FAILED; 710 rtn = FAILED;
595 } 711 }
596 712
@@ -723,31 +839,6 @@ int scsi_eh_get_sense(struct list_head *work_q,
723EXPORT_SYMBOL_GPL(scsi_eh_get_sense); 839EXPORT_SYMBOL_GPL(scsi_eh_get_sense);
724 840
725/** 841/**
726 * scsi_try_to_abort_cmd - Ask host to abort a running command.
727 * @scmd: SCSI cmd to abort from Lower Level.
728 *
729 * Notes:
730 * This function will not return until the user's completion function
731 * has been called. there is no timeout on this operation. if the
732 * author of the low-level driver wishes this operation to be timed,
733 * they can provide this facility themselves. helper functions in
734 * scsi_error.c can be supplied to make this easier to do.
735 **/
736static int scsi_try_to_abort_cmd(struct scsi_cmnd *scmd)
737{
738 if (!scmd->device->host->hostt->eh_abort_handler)
739 return FAILED;
740
741 /*
742 * scsi_done was called just after the command timed out and before
743 * we had a chance to process it. (db)
744 */
745 if (scmd->serial_number == 0)
746 return SUCCESS;
747 return scmd->device->host->hostt->eh_abort_handler(scmd);
748}
749
750/**
751 * scsi_eh_tur - Send TUR to device. 842 * scsi_eh_tur - Send TUR to device.
752 * @scmd: Scsi cmd to send TUR 843 * @scmd: Scsi cmd to send TUR
753 * 844 *
@@ -821,32 +912,6 @@ static int scsi_eh_abort_cmds(struct list_head *work_q,
821} 912}
822 913
823/** 914/**
824 * scsi_try_bus_device_reset - Ask host to perform a BDR on a dev
825 * @scmd: SCSI cmd used to send BDR
826 *
827 * Notes:
828 * There is no timeout for this operation. if this operation is
829 * unreliable for a given host, then the host itself needs to put a
830 * timer on it, and set the host back to a consistent state prior to
831 * returning.
832 **/
833static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd)
834{
835 int rtn;
836
837 if (!scmd->device->host->hostt->eh_device_reset_handler)
838 return FAILED;
839
840 rtn = scmd->device->host->hostt->eh_device_reset_handler(scmd);
841 if (rtn == SUCCESS) {
842 scmd->device->was_reset = 1;
843 scmd->device->expecting_cc_ua = 1;
844 }
845
846 return rtn;
847}
848
849/**
850 * scsi_eh_try_stu - Send START_UNIT to device. 915 * scsi_eh_try_stu - Send START_UNIT to device.
851 * @scmd: Scsi cmd to send START_UNIT 916 * @scmd: Scsi cmd to send START_UNIT
852 * 917 *
@@ -977,64 +1042,6 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,
977} 1042}
978 1043
979/** 1044/**
980 * scsi_try_bus_reset - ask host to perform a bus reset
981 * @scmd: SCSI cmd to send bus reset.
982 **/
983static int scsi_try_bus_reset(struct scsi_cmnd *scmd)
984{
985 unsigned long flags;
986 int rtn;
987
988 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Bus RST\n",
989 __FUNCTION__));
990
991 if (!scmd->device->host->hostt->eh_bus_reset_handler)
992 return FAILED;
993
994 rtn = scmd->device->host->hostt->eh_bus_reset_handler(scmd);
995
996 if (rtn == SUCCESS) {
997 if (!scmd->device->host->hostt->skip_settle_delay)
998 ssleep(BUS_RESET_SETTLE_TIME);
999 spin_lock_irqsave(scmd->device->host->host_lock, flags);
1000 scsi_report_bus_reset(scmd->device->host,
1001 scmd_channel(scmd));
1002 spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
1003 }
1004
1005 return rtn;
1006}
1007
1008/**
1009 * scsi_try_host_reset - ask host adapter to reset itself
1010 * @scmd: SCSI cmd to send hsot reset.
1011 **/
1012static int scsi_try_host_reset(struct scsi_cmnd *scmd)
1013{
1014 unsigned long flags;
1015 int rtn;
1016
1017 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n",
1018 __FUNCTION__));
1019
1020 if (!scmd->device->host->hostt->eh_host_reset_handler)
1021 return FAILED;
1022
1023 rtn = scmd->device->host->hostt->eh_host_reset_handler(scmd);
1024
1025 if (rtn == SUCCESS) {
1026 if (!scmd->device->host->hostt->skip_settle_delay)
1027 ssleep(HOST_RESET_SETTLE_TIME);
1028 spin_lock_irqsave(scmd->device->host->host_lock, flags);
1029 scsi_report_bus_reset(scmd->device->host,
1030 scmd_channel(scmd));
1031 spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
1032 }
1033
1034 return rtn;
1035}
1036
1037/**
1038 * scsi_eh_bus_reset - send a bus reset 1045 * scsi_eh_bus_reset - send a bus reset
1039 * @shost: scsi host being recovered. 1046 * @shost: scsi host being recovered.
1040 * @eh_done_q: list_head for processed commands. 1047 * @eh_done_q: list_head for processed commands.