diff options
author | Brian King <brking@linux.vnet.ibm.com> | 2007-01-30 18:51:17 -0500 |
---|---|---|
committer | James Bottomley <jejb@mulgrave.il.steeleye.com> | 2007-02-03 09:32:10 -0500 |
commit | 292148f8bb2b5d120440e046d24de07a739461aa (patch) | |
tree | b87e6805e4fd9d7c0306c3ea350049275156cb0f | |
parent | 214fbb75075efa677b614be79a2d62dd79785b4f (diff) |
[SCSI] scsi_error: Fix lost EH commands
If an EH command times out today, the LLDD's abort handler
will be called to abort the command. It is assumed that this
completes successfully, which can result in the command getting
completed later resulting in an oops. Improve the current
implementation by escalating all the way to host reset if
necessary in order to clean up the EH command.
Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
-rw-r--r-- | drivers/scsi/scsi_error.c | 239 |
1 files changed, 123 insertions, 116 deletions
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 2dce06a58c08..b8edcf5b5451 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c | |||
@@ -458,6 +458,128 @@ static void scsi_eh_done(struct scsi_cmnd *scmd) | |||
458 | } | 458 | } |
459 | 459 | ||
460 | /** | 460 | /** |
461 | * scsi_try_host_reset - ask host adapter to reset itself | ||
462 | * @scmd: SCSI cmd to send hsot reset. | ||
463 | **/ | ||
464 | static int scsi_try_host_reset(struct scsi_cmnd *scmd) | ||
465 | { | ||
466 | unsigned long flags; | ||
467 | int rtn; | ||
468 | |||
469 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n", | ||
470 | __FUNCTION__)); | ||
471 | |||
472 | if (!scmd->device->host->hostt->eh_host_reset_handler) | ||
473 | return FAILED; | ||
474 | |||
475 | rtn = scmd->device->host->hostt->eh_host_reset_handler(scmd); | ||
476 | |||
477 | if (rtn == SUCCESS) { | ||
478 | if (!scmd->device->host->hostt->skip_settle_delay) | ||
479 | ssleep(HOST_RESET_SETTLE_TIME); | ||
480 | spin_lock_irqsave(scmd->device->host->host_lock, flags); | ||
481 | scsi_report_bus_reset(scmd->device->host, | ||
482 | scmd_channel(scmd)); | ||
483 | spin_unlock_irqrestore(scmd->device->host->host_lock, flags); | ||
484 | } | ||
485 | |||
486 | return rtn; | ||
487 | } | ||
488 | |||
489 | /** | ||
490 | * scsi_try_bus_reset - ask host to perform a bus reset | ||
491 | * @scmd: SCSI cmd to send bus reset. | ||
492 | **/ | ||
493 | static int scsi_try_bus_reset(struct scsi_cmnd *scmd) | ||
494 | { | ||
495 | unsigned long flags; | ||
496 | int rtn; | ||
497 | |||
498 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Bus RST\n", | ||
499 | __FUNCTION__)); | ||
500 | |||
501 | if (!scmd->device->host->hostt->eh_bus_reset_handler) | ||
502 | return FAILED; | ||
503 | |||
504 | rtn = scmd->device->host->hostt->eh_bus_reset_handler(scmd); | ||
505 | |||
506 | if (rtn == SUCCESS) { | ||
507 | if (!scmd->device->host->hostt->skip_settle_delay) | ||
508 | ssleep(BUS_RESET_SETTLE_TIME); | ||
509 | spin_lock_irqsave(scmd->device->host->host_lock, flags); | ||
510 | scsi_report_bus_reset(scmd->device->host, | ||
511 | scmd_channel(scmd)); | ||
512 | spin_unlock_irqrestore(scmd->device->host->host_lock, flags); | ||
513 | } | ||
514 | |||
515 | return rtn; | ||
516 | } | ||
517 | |||
518 | /** | ||
519 | * scsi_try_bus_device_reset - Ask host to perform a BDR on a dev | ||
520 | * @scmd: SCSI cmd used to send BDR | ||
521 | * | ||
522 | * Notes: | ||
523 | * There is no timeout for this operation. if this operation is | ||
524 | * unreliable for a given host, then the host itself needs to put a | ||
525 | * timer on it, and set the host back to a consistent state prior to | ||
526 | * returning. | ||
527 | **/ | ||
528 | static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd) | ||
529 | { | ||
530 | int rtn; | ||
531 | |||
532 | if (!scmd->device->host->hostt->eh_device_reset_handler) | ||
533 | return FAILED; | ||
534 | |||
535 | rtn = scmd->device->host->hostt->eh_device_reset_handler(scmd); | ||
536 | if (rtn == SUCCESS) { | ||
537 | scmd->device->was_reset = 1; | ||
538 | scmd->device->expecting_cc_ua = 1; | ||
539 | } | ||
540 | |||
541 | return rtn; | ||
542 | } | ||
543 | |||
544 | static int __scsi_try_to_abort_cmd(struct scsi_cmnd *scmd) | ||
545 | { | ||
546 | if (!scmd->device->host->hostt->eh_abort_handler) | ||
547 | return FAILED; | ||
548 | |||
549 | return scmd->device->host->hostt->eh_abort_handler(scmd); | ||
550 | } | ||
551 | |||
552 | /** | ||
553 | * scsi_try_to_abort_cmd - Ask host to abort a running command. | ||
554 | * @scmd: SCSI cmd to abort from Lower Level. | ||
555 | * | ||
556 | * Notes: | ||
557 | * This function will not return until the user's completion function | ||
558 | * has been called. there is no timeout on this operation. if the | ||
559 | * author of the low-level driver wishes this operation to be timed, | ||
560 | * they can provide this facility themselves. helper functions in | ||
561 | * scsi_error.c can be supplied to make this easier to do. | ||
562 | **/ | ||
563 | static int scsi_try_to_abort_cmd(struct scsi_cmnd *scmd) | ||
564 | { | ||
565 | /* | ||
566 | * scsi_done was called just after the command timed out and before | ||
567 | * we had a chance to process it. (db) | ||
568 | */ | ||
569 | if (scmd->serial_number == 0) | ||
570 | return SUCCESS; | ||
571 | return __scsi_try_to_abort_cmd(scmd); | ||
572 | } | ||
573 | |||
574 | static void scsi_abort_eh_cmnd(struct scsi_cmnd *scmd) | ||
575 | { | ||
576 | if (__scsi_try_to_abort_cmd(scmd) != SUCCESS) | ||
577 | if (scsi_try_bus_device_reset(scmd) != SUCCESS) | ||
578 | if (scsi_try_bus_reset(scmd) != SUCCESS) | ||
579 | scsi_try_host_reset(scmd); | ||
580 | } | ||
581 | |||
582 | /** | ||
461 | * scsi_send_eh_cmnd - submit a scsi command as part of error recory | 583 | * scsi_send_eh_cmnd - submit a scsi command as part of error recory |
462 | * @scmd: SCSI command structure to hijack | 584 | * @scmd: SCSI command structure to hijack |
463 | * @cmnd: CDB to send | 585 | * @cmnd: CDB to send |
@@ -584,13 +706,7 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd, | |||
584 | break; | 706 | break; |
585 | } | 707 | } |
586 | } else { | 708 | } else { |
587 | /* | 709 | scsi_abort_eh_cmnd(scmd); |
588 | * FIXME(eric) - we are not tracking whether we could | ||
589 | * abort a timed out command or not. not sure how | ||
590 | * we should treat them differently anyways. | ||
591 | */ | ||
592 | if (shost->hostt->eh_abort_handler) | ||
593 | shost->hostt->eh_abort_handler(scmd); | ||
594 | rtn = FAILED; | 710 | rtn = FAILED; |
595 | } | 711 | } |
596 | 712 | ||
@@ -723,31 +839,6 @@ int scsi_eh_get_sense(struct list_head *work_q, | |||
723 | EXPORT_SYMBOL_GPL(scsi_eh_get_sense); | 839 | EXPORT_SYMBOL_GPL(scsi_eh_get_sense); |
724 | 840 | ||
725 | /** | 841 | /** |
726 | * scsi_try_to_abort_cmd - Ask host to abort a running command. | ||
727 | * @scmd: SCSI cmd to abort from Lower Level. | ||
728 | * | ||
729 | * Notes: | ||
730 | * This function will not return until the user's completion function | ||
731 | * has been called. there is no timeout on this operation. if the | ||
732 | * author of the low-level driver wishes this operation to be timed, | ||
733 | * they can provide this facility themselves. helper functions in | ||
734 | * scsi_error.c can be supplied to make this easier to do. | ||
735 | **/ | ||
736 | static int scsi_try_to_abort_cmd(struct scsi_cmnd *scmd) | ||
737 | { | ||
738 | if (!scmd->device->host->hostt->eh_abort_handler) | ||
739 | return FAILED; | ||
740 | |||
741 | /* | ||
742 | * scsi_done was called just after the command timed out and before | ||
743 | * we had a chance to process it. (db) | ||
744 | */ | ||
745 | if (scmd->serial_number == 0) | ||
746 | return SUCCESS; | ||
747 | return scmd->device->host->hostt->eh_abort_handler(scmd); | ||
748 | } | ||
749 | |||
750 | /** | ||
751 | * scsi_eh_tur - Send TUR to device. | 842 | * scsi_eh_tur - Send TUR to device. |
752 | * @scmd: Scsi cmd to send TUR | 843 | * @scmd: Scsi cmd to send TUR |
753 | * | 844 | * |
@@ -821,32 +912,6 @@ static int scsi_eh_abort_cmds(struct list_head *work_q, | |||
821 | } | 912 | } |
822 | 913 | ||
823 | /** | 914 | /** |
824 | * scsi_try_bus_device_reset - Ask host to perform a BDR on a dev | ||
825 | * @scmd: SCSI cmd used to send BDR | ||
826 | * | ||
827 | * Notes: | ||
828 | * There is no timeout for this operation. if this operation is | ||
829 | * unreliable for a given host, then the host itself needs to put a | ||
830 | * timer on it, and set the host back to a consistent state prior to | ||
831 | * returning. | ||
832 | **/ | ||
833 | static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd) | ||
834 | { | ||
835 | int rtn; | ||
836 | |||
837 | if (!scmd->device->host->hostt->eh_device_reset_handler) | ||
838 | return FAILED; | ||
839 | |||
840 | rtn = scmd->device->host->hostt->eh_device_reset_handler(scmd); | ||
841 | if (rtn == SUCCESS) { | ||
842 | scmd->device->was_reset = 1; | ||
843 | scmd->device->expecting_cc_ua = 1; | ||
844 | } | ||
845 | |||
846 | return rtn; | ||
847 | } | ||
848 | |||
849 | /** | ||
850 | * scsi_eh_try_stu - Send START_UNIT to device. | 915 | * scsi_eh_try_stu - Send START_UNIT to device. |
851 | * @scmd: Scsi cmd to send START_UNIT | 916 | * @scmd: Scsi cmd to send START_UNIT |
852 | * | 917 | * |
@@ -977,64 +1042,6 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host *shost, | |||
977 | } | 1042 | } |
978 | 1043 | ||
979 | /** | 1044 | /** |
980 | * scsi_try_bus_reset - ask host to perform a bus reset | ||
981 | * @scmd: SCSI cmd to send bus reset. | ||
982 | **/ | ||
983 | static int scsi_try_bus_reset(struct scsi_cmnd *scmd) | ||
984 | { | ||
985 | unsigned long flags; | ||
986 | int rtn; | ||
987 | |||
988 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Bus RST\n", | ||
989 | __FUNCTION__)); | ||
990 | |||
991 | if (!scmd->device->host->hostt->eh_bus_reset_handler) | ||
992 | return FAILED; | ||
993 | |||
994 | rtn = scmd->device->host->hostt->eh_bus_reset_handler(scmd); | ||
995 | |||
996 | if (rtn == SUCCESS) { | ||
997 | if (!scmd->device->host->hostt->skip_settle_delay) | ||
998 | ssleep(BUS_RESET_SETTLE_TIME); | ||
999 | spin_lock_irqsave(scmd->device->host->host_lock, flags); | ||
1000 | scsi_report_bus_reset(scmd->device->host, | ||
1001 | scmd_channel(scmd)); | ||
1002 | spin_unlock_irqrestore(scmd->device->host->host_lock, flags); | ||
1003 | } | ||
1004 | |||
1005 | return rtn; | ||
1006 | } | ||
1007 | |||
1008 | /** | ||
1009 | * scsi_try_host_reset - ask host adapter to reset itself | ||
1010 | * @scmd: SCSI cmd to send hsot reset. | ||
1011 | **/ | ||
1012 | static int scsi_try_host_reset(struct scsi_cmnd *scmd) | ||
1013 | { | ||
1014 | unsigned long flags; | ||
1015 | int rtn; | ||
1016 | |||
1017 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n", | ||
1018 | __FUNCTION__)); | ||
1019 | |||
1020 | if (!scmd->device->host->hostt->eh_host_reset_handler) | ||
1021 | return FAILED; | ||
1022 | |||
1023 | rtn = scmd->device->host->hostt->eh_host_reset_handler(scmd); | ||
1024 | |||
1025 | if (rtn == SUCCESS) { | ||
1026 | if (!scmd->device->host->hostt->skip_settle_delay) | ||
1027 | ssleep(HOST_RESET_SETTLE_TIME); | ||
1028 | spin_lock_irqsave(scmd->device->host->host_lock, flags); | ||
1029 | scsi_report_bus_reset(scmd->device->host, | ||
1030 | scmd_channel(scmd)); | ||
1031 | spin_unlock_irqrestore(scmd->device->host->host_lock, flags); | ||
1032 | } | ||
1033 | |||
1034 | return rtn; | ||
1035 | } | ||
1036 | |||
1037 | /** | ||
1038 | * scsi_eh_bus_reset - send a bus reset | 1045 | * scsi_eh_bus_reset - send a bus reset |
1039 | * @shost: scsi host being recovered. | 1046 | * @shost: scsi host being recovered. |
1040 | * @eh_done_q: list_head for processed commands. | 1047 | * @eh_done_q: list_head for processed commands. |