diff options
author | Tejun Heo <htejun@gmail.com> | 2006-05-15 07:58:22 -0400 |
---|---|---|
committer | Tejun Heo <htejun@gmail.com> | 2006-05-15 07:58:22 -0400 |
commit | 022bdb075b9e1f224088a0b268de56268d7bc5b6 (patch) | |
tree | 05878e28202f0a86bdcc32ae5d995db9f15042da | |
parent | f3e81b19aac23c0e8c55d5961324ef7de44c23bb (diff) |
[PATCH] libata-eh: implement new EH
Implement new EH. The exported interface is ata_do_eh() which is to
be called from ->error_handler and performs the following steps to
recover the failed port.
ata_eh_autopsy() : analyze SError/TF, determine the cause of failure
and required recovery actions and record it in
ap->eh_context
ata_eh_report() : report the failure to user
ata_eh_recover() : perform recovery actions described in ap->eh_context
ata_eh_finish() : finish failed qcs
LLDDs can customize error handling by modifying eh_context before
calling ata_do_eh() or, if necessary, doing so inbetween each major
steps by calling each step explicitly.
Signed-off-by: Tejun Heo <htejun@gmail.com>
-rw-r--r-- | drivers/scsi/libata-core.c | 1 | ||||
-rw-r--r-- | drivers/scsi/libata-eh.c | 775 | ||||
-rw-r--r-- | include/linux/libata.h | 5 |
3 files changed, 781 insertions, 0 deletions
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 4def48ed6f46..ddc47097d37e 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c | |||
@@ -5440,3 +5440,4 @@ EXPORT_SYMBOL_GPL(ata_eh_freeze_port); | |||
5440 | EXPORT_SYMBOL_GPL(ata_eh_thaw_port); | 5440 | EXPORT_SYMBOL_GPL(ata_eh_thaw_port); |
5441 | EXPORT_SYMBOL_GPL(ata_eh_qc_complete); | 5441 | EXPORT_SYMBOL_GPL(ata_eh_qc_complete); |
5442 | EXPORT_SYMBOL_GPL(ata_eh_qc_retry); | 5442 | EXPORT_SYMBOL_GPL(ata_eh_qc_retry); |
5443 | EXPORT_SYMBOL_GPL(ata_do_eh); | ||
diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c index 1968f2d140f3..cd133f83e595 100644 --- a/drivers/scsi/libata-eh.c +++ b/drivers/scsi/libata-eh.c | |||
@@ -626,3 +626,778 @@ void ata_eh_qc_retry(struct ata_queued_cmd *qc) | |||
626 | scmd->retries--; | 626 | scmd->retries--; |
627 | __ata_eh_qc_complete(qc); | 627 | __ata_eh_qc_complete(qc); |
628 | } | 628 | } |
629 | |||
630 | /** | ||
631 | * ata_eh_about_to_do - about to perform eh_action | ||
632 | * @ap: target ATA port | ||
633 | * @action: action about to be performed | ||
634 | * | ||
635 | * Called just before performing EH actions to clear related bits | ||
636 | * in @ap->eh_info such that eh actions are not unnecessarily | ||
637 | * repeated. | ||
638 | * | ||
639 | * LOCKING: | ||
640 | * None. | ||
641 | */ | ||
642 | static void ata_eh_about_to_do(struct ata_port *ap, unsigned int action) | ||
643 | { | ||
644 | unsigned long flags; | ||
645 | |||
646 | spin_lock_irqsave(&ap->host_set->lock, flags); | ||
647 | ap->eh_info.action &= ~action; | ||
648 | ap->flags |= ATA_FLAG_RECOVERED; | ||
649 | spin_unlock_irqrestore(&ap->host_set->lock, flags); | ||
650 | } | ||
651 | |||
652 | /** | ||
653 | * ata_err_string - convert err_mask to descriptive string | ||
654 | * @err_mask: error mask to convert to string | ||
655 | * | ||
656 | * Convert @err_mask to descriptive string. Errors are | ||
657 | * prioritized according to severity and only the most severe | ||
658 | * error is reported. | ||
659 | * | ||
660 | * LOCKING: | ||
661 | * None. | ||
662 | * | ||
663 | * RETURNS: | ||
664 | * Descriptive string for @err_mask | ||
665 | */ | ||
666 | static const char * ata_err_string(unsigned int err_mask) | ||
667 | { | ||
668 | if (err_mask & AC_ERR_HOST_BUS) | ||
669 | return "host bus error"; | ||
670 | if (err_mask & AC_ERR_ATA_BUS) | ||
671 | return "ATA bus error"; | ||
672 | if (err_mask & AC_ERR_TIMEOUT) | ||
673 | return "timeout"; | ||
674 | if (err_mask & AC_ERR_HSM) | ||
675 | return "HSM violation"; | ||
676 | if (err_mask & AC_ERR_SYSTEM) | ||
677 | return "internal error"; | ||
678 | if (err_mask & AC_ERR_MEDIA) | ||
679 | return "media error"; | ||
680 | if (err_mask & AC_ERR_INVALID) | ||
681 | return "invalid argument"; | ||
682 | if (err_mask & AC_ERR_DEV) | ||
683 | return "device error"; | ||
684 | return "unknown error"; | ||
685 | } | ||
686 | |||
687 | /** | ||
688 | * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE | ||
689 | * @dev: device to perform REQUEST_SENSE to | ||
690 | * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) | ||
691 | * | ||
692 | * Perform ATAPI REQUEST_SENSE after the device reported CHECK | ||
693 | * SENSE. This function is EH helper. | ||
694 | * | ||
695 | * LOCKING: | ||
696 | * Kernel thread context (may sleep). | ||
697 | * | ||
698 | * RETURNS: | ||
699 | * 0 on success, AC_ERR_* mask on failure | ||
700 | */ | ||
701 | static unsigned int atapi_eh_request_sense(struct ata_device *dev, | ||
702 | unsigned char *sense_buf) | ||
703 | { | ||
704 | struct ata_port *ap = dev->ap; | ||
705 | struct ata_taskfile tf; | ||
706 | u8 cdb[ATAPI_CDB_LEN]; | ||
707 | |||
708 | DPRINTK("ATAPI request sense\n"); | ||
709 | |||
710 | ata_tf_init(dev, &tf); | ||
711 | |||
712 | /* FIXME: is this needed? */ | ||
713 | memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); | ||
714 | |||
715 | /* XXX: why tf_read here? */ | ||
716 | ap->ops->tf_read(ap, &tf); | ||
717 | |||
718 | /* fill these in, for the case where they are -not- overwritten */ | ||
719 | sense_buf[0] = 0x70; | ||
720 | sense_buf[2] = tf.feature >> 4; | ||
721 | |||
722 | memset(cdb, 0, ATAPI_CDB_LEN); | ||
723 | cdb[0] = REQUEST_SENSE; | ||
724 | cdb[4] = SCSI_SENSE_BUFFERSIZE; | ||
725 | |||
726 | tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; | ||
727 | tf.command = ATA_CMD_PACKET; | ||
728 | |||
729 | /* is it pointless to prefer PIO for "safety reasons"? */ | ||
730 | if (ap->flags & ATA_FLAG_PIO_DMA) { | ||
731 | tf.protocol = ATA_PROT_ATAPI_DMA; | ||
732 | tf.feature |= ATAPI_PKT_DMA; | ||
733 | } else { | ||
734 | tf.protocol = ATA_PROT_ATAPI; | ||
735 | tf.lbam = (8 * 1024) & 0xff; | ||
736 | tf.lbah = (8 * 1024) >> 8; | ||
737 | } | ||
738 | |||
739 | return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, | ||
740 | sense_buf, SCSI_SENSE_BUFFERSIZE); | ||
741 | } | ||
742 | |||
743 | /** | ||
744 | * ata_eh_analyze_serror - analyze SError for a failed port | ||
745 | * @ap: ATA port to analyze SError for | ||
746 | * | ||
747 | * Analyze SError if available and further determine cause of | ||
748 | * failure. | ||
749 | * | ||
750 | * LOCKING: | ||
751 | * None. | ||
752 | */ | ||
753 | static void ata_eh_analyze_serror(struct ata_port *ap) | ||
754 | { | ||
755 | struct ata_eh_context *ehc = &ap->eh_context; | ||
756 | u32 serror = ehc->i.serror; | ||
757 | unsigned int err_mask = 0, action = 0; | ||
758 | |||
759 | if (serror & SERR_PERSISTENT) { | ||
760 | err_mask |= AC_ERR_ATA_BUS; | ||
761 | action |= ATA_EH_HARDRESET; | ||
762 | } | ||
763 | if (serror & | ||
764 | (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) { | ||
765 | err_mask |= AC_ERR_ATA_BUS; | ||
766 | action |= ATA_EH_SOFTRESET; | ||
767 | } | ||
768 | if (serror & SERR_PROTOCOL) { | ||
769 | err_mask |= AC_ERR_HSM; | ||
770 | action |= ATA_EH_SOFTRESET; | ||
771 | } | ||
772 | if (serror & SERR_INTERNAL) { | ||
773 | err_mask |= AC_ERR_SYSTEM; | ||
774 | action |= ATA_EH_SOFTRESET; | ||
775 | } | ||
776 | if (serror & (SERR_PHYRDY_CHG | SERR_DEV_XCHG)) { | ||
777 | err_mask |= AC_ERR_ATA_BUS; | ||
778 | action |= ATA_EH_HARDRESET; | ||
779 | } | ||
780 | |||
781 | ehc->i.err_mask |= err_mask; | ||
782 | ehc->i.action |= action; | ||
783 | } | ||
784 | |||
785 | /** | ||
786 | * ata_eh_analyze_tf - analyze taskfile of a failed qc | ||
787 | * @qc: qc to analyze | ||
788 | * @tf: Taskfile registers to analyze | ||
789 | * | ||
790 | * Analyze taskfile of @qc and further determine cause of | ||
791 | * failure. This function also requests ATAPI sense data if | ||
792 | * avaliable. | ||
793 | * | ||
794 | * LOCKING: | ||
795 | * Kernel thread context (may sleep). | ||
796 | * | ||
797 | * RETURNS: | ||
798 | * Determined recovery action | ||
799 | */ | ||
800 | static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, | ||
801 | const struct ata_taskfile *tf) | ||
802 | { | ||
803 | unsigned int tmp, action = 0; | ||
804 | u8 stat = tf->command, err = tf->feature; | ||
805 | |||
806 | if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { | ||
807 | qc->err_mask |= AC_ERR_HSM; | ||
808 | return ATA_EH_SOFTRESET; | ||
809 | } | ||
810 | |||
811 | if (!(qc->err_mask & AC_ERR_DEV)) | ||
812 | return 0; | ||
813 | |||
814 | switch (qc->dev->class) { | ||
815 | case ATA_DEV_ATA: | ||
816 | if (err & ATA_ICRC) | ||
817 | qc->err_mask |= AC_ERR_ATA_BUS; | ||
818 | if (err & ATA_UNC) | ||
819 | qc->err_mask |= AC_ERR_MEDIA; | ||
820 | if (err & ATA_IDNF) | ||
821 | qc->err_mask |= AC_ERR_INVALID; | ||
822 | break; | ||
823 | |||
824 | case ATA_DEV_ATAPI: | ||
825 | tmp = atapi_eh_request_sense(qc->dev, | ||
826 | qc->scsicmd->sense_buffer); | ||
827 | if (!tmp) { | ||
828 | /* ATA_QCFLAG_SENSE_VALID is used to tell | ||
829 | * atapi_qc_complete() that sense data is | ||
830 | * already valid. | ||
831 | * | ||
832 | * TODO: interpret sense data and set | ||
833 | * appropriate err_mask. | ||
834 | */ | ||
835 | qc->flags |= ATA_QCFLAG_SENSE_VALID; | ||
836 | } else | ||
837 | qc->err_mask |= tmp; | ||
838 | } | ||
839 | |||
840 | if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) | ||
841 | action |= ATA_EH_SOFTRESET; | ||
842 | |||
843 | return action; | ||
844 | } | ||
845 | |||
846 | static int ata_eh_categorize_ering_entry(struct ata_ering_entry *ent) | ||
847 | { | ||
848 | if (ent->err_mask & (AC_ERR_ATA_BUS | AC_ERR_TIMEOUT)) | ||
849 | return 1; | ||
850 | |||
851 | if (ent->is_io) { | ||
852 | if (ent->err_mask & AC_ERR_HSM) | ||
853 | return 1; | ||
854 | if ((ent->err_mask & | ||
855 | (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) | ||
856 | return 2; | ||
857 | } | ||
858 | |||
859 | return 0; | ||
860 | } | ||
861 | |||
862 | struct speed_down_needed_arg { | ||
863 | u64 since; | ||
864 | int nr_errors[3]; | ||
865 | }; | ||
866 | |||
867 | static int speed_down_needed_cb(struct ata_ering_entry *ent, void *void_arg) | ||
868 | { | ||
869 | struct speed_down_needed_arg *arg = void_arg; | ||
870 | |||
871 | if (ent->timestamp < arg->since) | ||
872 | return -1; | ||
873 | |||
874 | arg->nr_errors[ata_eh_categorize_ering_entry(ent)]++; | ||
875 | return 0; | ||
876 | } | ||
877 | |||
878 | /** | ||
879 | * ata_eh_speed_down_needed - Determine wheter speed down is necessary | ||
880 | * @dev: Device of interest | ||
881 | * | ||
882 | * This function examines error ring of @dev and determines | ||
883 | * whether speed down is necessary. Speed down is necessary if | ||
884 | * there have been more than 3 of Cat-1 errors or 10 of Cat-2 | ||
885 | * errors during last 15 minutes. | ||
886 | * | ||
887 | * Cat-1 errors are ATA_BUS, TIMEOUT for any command and HSM | ||
888 | * violation for known supported commands. | ||
889 | * | ||
890 | * Cat-2 errors are unclassified DEV error for known supported | ||
891 | * command. | ||
892 | * | ||
893 | * LOCKING: | ||
894 | * Inherited from caller. | ||
895 | * | ||
896 | * RETURNS: | ||
897 | * 1 if speed down is necessary, 0 otherwise | ||
898 | */ | ||
899 | static int ata_eh_speed_down_needed(struct ata_device *dev) | ||
900 | { | ||
901 | const u64 interval = 15LLU * 60 * HZ; | ||
902 | static const int err_limits[3] = { -1, 3, 10 }; | ||
903 | struct speed_down_needed_arg arg; | ||
904 | struct ata_ering_entry *ent; | ||
905 | int err_cat; | ||
906 | u64 j64; | ||
907 | |||
908 | ent = ata_ering_top(&dev->ering); | ||
909 | if (!ent) | ||
910 | return 0; | ||
911 | |||
912 | err_cat = ata_eh_categorize_ering_entry(ent); | ||
913 | if (err_cat == 0) | ||
914 | return 0; | ||
915 | |||
916 | memset(&arg, 0, sizeof(arg)); | ||
917 | |||
918 | j64 = get_jiffies_64(); | ||
919 | if (j64 >= interval) | ||
920 | arg.since = j64 - interval; | ||
921 | else | ||
922 | arg.since = 0; | ||
923 | |||
924 | ata_ering_map(&dev->ering, speed_down_needed_cb, &arg); | ||
925 | |||
926 | return arg.nr_errors[err_cat] > err_limits[err_cat]; | ||
927 | } | ||
928 | |||
929 | /** | ||
930 | * ata_eh_speed_down - record error and speed down if necessary | ||
931 | * @dev: Failed device | ||
932 | * @is_io: Did the device fail during normal IO? | ||
933 | * @err_mask: err_mask of the error | ||
934 | * | ||
935 | * Record error and examine error history to determine whether | ||
936 | * adjusting transmission speed is necessary. It also sets | ||
937 | * transmission limits appropriately if such adjustment is | ||
938 | * necessary. | ||
939 | * | ||
940 | * LOCKING: | ||
941 | * Kernel thread context (may sleep). | ||
942 | * | ||
943 | * RETURNS: | ||
944 | * 0 on success, -errno otherwise | ||
945 | */ | ||
946 | static int ata_eh_speed_down(struct ata_device *dev, int is_io, | ||
947 | unsigned int err_mask) | ||
948 | { | ||
949 | if (!err_mask) | ||
950 | return 0; | ||
951 | |||
952 | /* record error and determine whether speed down is necessary */ | ||
953 | ata_ering_record(&dev->ering, is_io, err_mask); | ||
954 | |||
955 | if (!ata_eh_speed_down_needed(dev)) | ||
956 | return 0; | ||
957 | |||
958 | /* speed down SATA link speed if possible */ | ||
959 | if (sata_down_spd_limit(dev->ap) == 0) | ||
960 | return ATA_EH_HARDRESET; | ||
961 | |||
962 | /* lower transfer mode */ | ||
963 | if (ata_down_xfermask_limit(dev, 0) == 0) | ||
964 | return ATA_EH_SOFTRESET; | ||
965 | |||
966 | ata_dev_printk(dev, KERN_ERR, | ||
967 | "speed down requested but no transfer mode left\n"); | ||
968 | return 0; | ||
969 | } | ||
970 | |||
971 | /** | ||
972 | * ata_eh_autopsy - analyze error and determine recovery action | ||
973 | * @ap: ATA port to perform autopsy on | ||
974 | * | ||
975 | * Analyze why @ap failed and determine which recovery action is | ||
976 | * needed. This function also sets more detailed AC_ERR_* values | ||
977 | * and fills sense data for ATAPI CHECK SENSE. | ||
978 | * | ||
979 | * LOCKING: | ||
980 | * Kernel thread context (may sleep). | ||
981 | */ | ||
982 | static void ata_eh_autopsy(struct ata_port *ap) | ||
983 | { | ||
984 | struct ata_eh_context *ehc = &ap->eh_context; | ||
985 | unsigned int action = ehc->i.action; | ||
986 | struct ata_device *failed_dev = NULL; | ||
987 | unsigned int all_err_mask = 0; | ||
988 | int tag, is_io = 0; | ||
989 | u32 serror; | ||
990 | int rc; | ||
991 | |||
992 | DPRINTK("ENTER\n"); | ||
993 | |||
994 | /* obtain and analyze SError */ | ||
995 | rc = sata_scr_read(ap, SCR_ERROR, &serror); | ||
996 | if (rc == 0) { | ||
997 | ehc->i.serror |= serror; | ||
998 | ata_eh_analyze_serror(ap); | ||
999 | } else if (rc != -EOPNOTSUPP) | ||
1000 | action |= ATA_EH_HARDRESET; | ||
1001 | |||
1002 | /* any real error trumps AC_ERR_OTHER */ | ||
1003 | if (ehc->i.err_mask & ~AC_ERR_OTHER) | ||
1004 | ehc->i.err_mask &= ~AC_ERR_OTHER; | ||
1005 | |||
1006 | all_err_mask |= ehc->i.err_mask; | ||
1007 | |||
1008 | for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { | ||
1009 | struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); | ||
1010 | |||
1011 | if (!(qc->flags & ATA_QCFLAG_FAILED)) | ||
1012 | continue; | ||
1013 | |||
1014 | /* inherit upper level err_mask */ | ||
1015 | qc->err_mask |= ehc->i.err_mask; | ||
1016 | |||
1017 | if (qc->err_mask & AC_ERR_TIMEOUT) | ||
1018 | action |= ATA_EH_SOFTRESET; | ||
1019 | |||
1020 | /* analyze TF */ | ||
1021 | action |= ata_eh_analyze_tf(qc, &qc->result_tf); | ||
1022 | |||
1023 | /* DEV errors are probably spurious in case of ATA_BUS error */ | ||
1024 | if (qc->err_mask & AC_ERR_ATA_BUS) | ||
1025 | qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | | ||
1026 | AC_ERR_INVALID); | ||
1027 | |||
1028 | /* any real error trumps unknown error */ | ||
1029 | if (qc->err_mask & ~AC_ERR_OTHER) | ||
1030 | qc->err_mask &= ~AC_ERR_OTHER; | ||
1031 | |||
1032 | /* SENSE_VALID trumps dev/unknown error and revalidation */ | ||
1033 | if (qc->flags & ATA_QCFLAG_SENSE_VALID) { | ||
1034 | qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); | ||
1035 | action &= ~ATA_EH_REVALIDATE; | ||
1036 | } | ||
1037 | |||
1038 | /* accumulate error info */ | ||
1039 | failed_dev = qc->dev; | ||
1040 | all_err_mask |= qc->err_mask; | ||
1041 | if (qc->flags & ATA_QCFLAG_IO) | ||
1042 | is_io = 1; | ||
1043 | } | ||
1044 | |||
1045 | /* speed down iff command was in progress */ | ||
1046 | if (failed_dev) | ||
1047 | action |= ata_eh_speed_down(failed_dev, is_io, all_err_mask); | ||
1048 | |||
1049 | if (all_err_mask) | ||
1050 | action |= ATA_EH_REVALIDATE; | ||
1051 | |||
1052 | ehc->i.dev = failed_dev; | ||
1053 | ehc->i.action = action; | ||
1054 | |||
1055 | DPRINTK("EXIT\n"); | ||
1056 | } | ||
1057 | |||
1058 | /** | ||
1059 | * ata_eh_report - report error handling to user | ||
1060 | * @ap: ATA port EH is going on | ||
1061 | * | ||
1062 | * Report EH to user. | ||
1063 | * | ||
1064 | * LOCKING: | ||
1065 | * None. | ||
1066 | */ | ||
1067 | static void ata_eh_report(struct ata_port *ap) | ||
1068 | { | ||
1069 | struct ata_eh_context *ehc = &ap->eh_context; | ||
1070 | const char *frozen, *desc; | ||
1071 | int tag, nr_failed = 0; | ||
1072 | |||
1073 | desc = NULL; | ||
1074 | if (ehc->i.desc[0] != '\0') | ||
1075 | desc = ehc->i.desc; | ||
1076 | |||
1077 | for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { | ||
1078 | struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); | ||
1079 | |||
1080 | if (!(qc->flags & ATA_QCFLAG_FAILED)) | ||
1081 | continue; | ||
1082 | if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) | ||
1083 | continue; | ||
1084 | |||
1085 | nr_failed++; | ||
1086 | } | ||
1087 | |||
1088 | if (!nr_failed && !ehc->i.err_mask) | ||
1089 | return; | ||
1090 | |||
1091 | frozen = ""; | ||
1092 | if (ap->flags & ATA_FLAG_FROZEN) | ||
1093 | frozen = " frozen"; | ||
1094 | |||
1095 | if (ehc->i.dev) { | ||
1096 | ata_dev_printk(ehc->i.dev, KERN_ERR, | ||
1097 | "exception Emask 0x%x SErr 0x%x action 0x%x%s\n", | ||
1098 | ehc->i.err_mask, ehc->i.serror, ehc->i.action, | ||
1099 | frozen); | ||
1100 | if (desc) | ||
1101 | ata_dev_printk(ehc->i.dev, KERN_ERR, "(%s)\n", desc); | ||
1102 | } else { | ||
1103 | ata_port_printk(ap, KERN_ERR, | ||
1104 | "exception Emask 0x%x SErr 0x%x action 0x%x%s\n", | ||
1105 | ehc->i.err_mask, ehc->i.serror, ehc->i.action, | ||
1106 | frozen); | ||
1107 | if (desc) | ||
1108 | ata_port_printk(ap, KERN_ERR, "(%s)\n", desc); | ||
1109 | } | ||
1110 | |||
1111 | for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { | ||
1112 | struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); | ||
1113 | |||
1114 | if (!(qc->flags & ATA_QCFLAG_FAILED) || !qc->err_mask) | ||
1115 | continue; | ||
1116 | |||
1117 | ata_dev_printk(qc->dev, KERN_ERR, "tag %d cmd 0x%x " | ||
1118 | "Emask 0x%x stat 0x%x err 0x%x (%s)\n", | ||
1119 | qc->tag, qc->tf.command, qc->err_mask, | ||
1120 | qc->result_tf.command, qc->result_tf.feature, | ||
1121 | ata_err_string(qc->err_mask)); | ||
1122 | } | ||
1123 | } | ||
1124 | |||
1125 | static int ata_eh_reset(struct ata_port *ap, ata_reset_fn_t softreset, | ||
1126 | ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) | ||
1127 | { | ||
1128 | struct ata_eh_context *ehc = &ap->eh_context; | ||
1129 | unsigned int classes[ATA_MAX_DEVICES]; | ||
1130 | int tries = ATA_EH_RESET_TRIES; | ||
1131 | ata_reset_fn_t reset; | ||
1132 | int rc; | ||
1133 | |||
1134 | if (softreset && (!hardreset || (!sata_set_spd_needed(ap) && | ||
1135 | !(ehc->i.action & ATA_EH_HARDRESET)))) | ||
1136 | reset = softreset; | ||
1137 | else | ||
1138 | reset = hardreset; | ||
1139 | |||
1140 | retry: | ||
1141 | ata_port_printk(ap, KERN_INFO, "%s resetting port\n", | ||
1142 | reset == softreset ? "soft" : "hard"); | ||
1143 | |||
1144 | /* reset */ | ||
1145 | ata_eh_about_to_do(ap, ATA_EH_RESET_MASK); | ||
1146 | ehc->i.flags |= ATA_EHI_DID_RESET; | ||
1147 | |||
1148 | rc = ata_do_reset(ap, reset, classes); | ||
1149 | |||
1150 | if (rc && --tries) { | ||
1151 | ata_port_printk(ap, KERN_WARNING, | ||
1152 | "%sreset failed, retrying in 5 secs\n", | ||
1153 | reset == softreset ? "soft" : "hard"); | ||
1154 | ssleep(5); | ||
1155 | |||
1156 | if (reset == hardreset) | ||
1157 | sata_down_spd_limit(ap); | ||
1158 | if (hardreset) | ||
1159 | reset = hardreset; | ||
1160 | goto retry; | ||
1161 | } | ||
1162 | |||
1163 | if (rc == 0) { | ||
1164 | if (postreset) | ||
1165 | postreset(ap, classes); | ||
1166 | |||
1167 | /* reset successful, schedule revalidation */ | ||
1168 | ehc->i.dev = NULL; | ||
1169 | ehc->i.action &= ~ATA_EH_RESET_MASK; | ||
1170 | ehc->i.action |= ATA_EH_REVALIDATE; | ||
1171 | } | ||
1172 | |||
1173 | return rc; | ||
1174 | } | ||
1175 | |||
1176 | static int ata_eh_revalidate(struct ata_port *ap, | ||
1177 | struct ata_device **r_failed_dev) | ||
1178 | { | ||
1179 | struct ata_eh_context *ehc = &ap->eh_context; | ||
1180 | struct ata_device *dev; | ||
1181 | int i, rc = 0; | ||
1182 | |||
1183 | DPRINTK("ENTER\n"); | ||
1184 | |||
1185 | for (i = 0; i < ATA_MAX_DEVICES; i++) { | ||
1186 | dev = &ap->device[i]; | ||
1187 | |||
1188 | if (ehc->i.action & ATA_EH_REVALIDATE && ata_dev_enabled(dev) && | ||
1189 | (!ehc->i.dev || ehc->i.dev == dev)) { | ||
1190 | if (ata_port_offline(ap)) { | ||
1191 | rc = -EIO; | ||
1192 | break; | ||
1193 | } | ||
1194 | |||
1195 | ata_eh_about_to_do(ap, ATA_EH_REVALIDATE); | ||
1196 | rc = ata_dev_revalidate(dev, | ||
1197 | ehc->i.flags & ATA_EHI_DID_RESET); | ||
1198 | if (rc) | ||
1199 | break; | ||
1200 | |||
1201 | ehc->i.action &= ~ATA_EH_REVALIDATE; | ||
1202 | } | ||
1203 | } | ||
1204 | |||
1205 | if (rc) | ||
1206 | *r_failed_dev = dev; | ||
1207 | |||
1208 | DPRINTK("EXIT\n"); | ||
1209 | return rc; | ||
1210 | } | ||
1211 | |||
1212 | static int ata_port_nr_enabled(struct ata_port *ap) | ||
1213 | { | ||
1214 | int i, cnt = 0; | ||
1215 | |||
1216 | for (i = 0; i < ATA_MAX_DEVICES; i++) | ||
1217 | if (ata_dev_enabled(&ap->device[i])) | ||
1218 | cnt++; | ||
1219 | return cnt; | ||
1220 | } | ||
1221 | |||
1222 | /** | ||
1223 | * ata_eh_recover - recover host port after error | ||
1224 | * @ap: host port to recover | ||
1225 | * @softreset: softreset method (can be NULL) | ||
1226 | * @hardreset: hardreset method (can be NULL) | ||
1227 | * @postreset: postreset method (can be NULL) | ||
1228 | * | ||
1229 | * This is the alpha and omega, eum and yang, heart and soul of | ||
1230 | * libata exception handling. On entry, actions required to | ||
1231 | * recover each devices are recorded in eh_context. This | ||
1232 | * function executes all the operations with appropriate retrials | ||
1233 | * and fallbacks to resurrect failed devices. | ||
1234 | * | ||
1235 | * LOCKING: | ||
1236 | * Kernel thread context (may sleep). | ||
1237 | * | ||
1238 | * RETURNS: | ||
1239 | * 0 on success, -errno on failure. | ||
1240 | */ | ||
1241 | static int ata_eh_recover(struct ata_port *ap, ata_reset_fn_t softreset, | ||
1242 | ata_reset_fn_t hardreset, | ||
1243 | ata_postreset_fn_t postreset) | ||
1244 | { | ||
1245 | struct ata_eh_context *ehc = &ap->eh_context; | ||
1246 | struct ata_device *dev; | ||
1247 | int down_xfermask, i, rc; | ||
1248 | |||
1249 | DPRINTK("ENTER\n"); | ||
1250 | |||
1251 | /* prep for recovery */ | ||
1252 | for (i = 0; i < ATA_MAX_DEVICES; i++) { | ||
1253 | dev = &ap->device[i]; | ||
1254 | |||
1255 | ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; | ||
1256 | } | ||
1257 | |||
1258 | retry: | ||
1259 | down_xfermask = 0; | ||
1260 | rc = 0; | ||
1261 | |||
1262 | /* skip EH if possible. */ | ||
1263 | if (!ata_port_nr_enabled(ap) && !(ap->flags & ATA_FLAG_FROZEN)) | ||
1264 | ehc->i.action = 0; | ||
1265 | |||
1266 | /* reset */ | ||
1267 | if (ehc->i.action & ATA_EH_RESET_MASK) { | ||
1268 | ata_eh_freeze_port(ap); | ||
1269 | |||
1270 | rc = ata_eh_reset(ap, softreset, hardreset, postreset); | ||
1271 | if (rc) { | ||
1272 | ata_port_printk(ap, KERN_ERR, | ||
1273 | "reset failed, giving up\n"); | ||
1274 | goto out; | ||
1275 | } | ||
1276 | |||
1277 | ata_eh_thaw_port(ap); | ||
1278 | } | ||
1279 | |||
1280 | /* revalidate existing devices */ | ||
1281 | rc = ata_eh_revalidate(ap, &dev); | ||
1282 | if (rc) | ||
1283 | goto dev_fail; | ||
1284 | |||
1285 | /* configure transfer mode if the port has been reset */ | ||
1286 | if (ehc->i.flags & ATA_EHI_DID_RESET) { | ||
1287 | rc = ata_set_mode(ap, &dev); | ||
1288 | if (rc) { | ||
1289 | down_xfermask = 1; | ||
1290 | goto dev_fail; | ||
1291 | } | ||
1292 | } | ||
1293 | |||
1294 | goto out; | ||
1295 | |||
1296 | dev_fail: | ||
1297 | switch (rc) { | ||
1298 | case -ENODEV: | ||
1299 | case -EINVAL: | ||
1300 | ehc->tries[dev->devno] = 0; | ||
1301 | break; | ||
1302 | case -EIO: | ||
1303 | sata_down_spd_limit(ap); | ||
1304 | default: | ||
1305 | ehc->tries[dev->devno]--; | ||
1306 | if (down_xfermask && | ||
1307 | ata_down_xfermask_limit(dev, ehc->tries[dev->devno] == 1)) | ||
1308 | ehc->tries[dev->devno] = 0; | ||
1309 | } | ||
1310 | |||
1311 | /* disable device if it has used up all its chances */ | ||
1312 | if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) | ||
1313 | ata_dev_disable(dev); | ||
1314 | |||
1315 | /* soft didn't work? be haaaaard */ | ||
1316 | if (ehc->i.flags & ATA_EHI_DID_RESET) | ||
1317 | ehc->i.action |= ATA_EH_HARDRESET; | ||
1318 | else | ||
1319 | ehc->i.action |= ATA_EH_SOFTRESET; | ||
1320 | |||
1321 | if (ata_port_nr_enabled(ap)) { | ||
1322 | ata_port_printk(ap, KERN_WARNING, "failed to recover some " | ||
1323 | "devices, retrying in 5 secs\n"); | ||
1324 | ssleep(5); | ||
1325 | } else { | ||
1326 | /* no device left, repeat fast */ | ||
1327 | msleep(500); | ||
1328 | } | ||
1329 | |||
1330 | goto retry; | ||
1331 | |||
1332 | out: | ||
1333 | if (rc) { | ||
1334 | for (i = 0; i < ATA_MAX_DEVICES; i++) | ||
1335 | ata_dev_disable(&ap->device[i]); | ||
1336 | } | ||
1337 | |||
1338 | DPRINTK("EXIT, rc=%d\n", rc); | ||
1339 | return rc; | ||
1340 | } | ||
1341 | |||
1342 | /** | ||
1343 | * ata_eh_finish - finish up EH | ||
1344 | * @ap: host port to finish EH for | ||
1345 | * | ||
1346 | * Recovery is complete. Clean up EH states and retry or finish | ||
1347 | * failed qcs. | ||
1348 | * | ||
1349 | * LOCKING: | ||
1350 | * None. | ||
1351 | */ | ||
1352 | static void ata_eh_finish(struct ata_port *ap) | ||
1353 | { | ||
1354 | int tag; | ||
1355 | |||
1356 | /* retry or finish qcs */ | ||
1357 | for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { | ||
1358 | struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); | ||
1359 | |||
1360 | if (!(qc->flags & ATA_QCFLAG_FAILED)) | ||
1361 | continue; | ||
1362 | |||
1363 | if (qc->err_mask) { | ||
1364 | /* FIXME: Once EH migration is complete, | ||
1365 | * generate sense data in this function, | ||
1366 | * considering both err_mask and tf. | ||
1367 | */ | ||
1368 | if (qc->err_mask & AC_ERR_INVALID) | ||
1369 | ata_eh_qc_complete(qc); | ||
1370 | else | ||
1371 | ata_eh_qc_retry(qc); | ||
1372 | } else { | ||
1373 | if (qc->flags & ATA_QCFLAG_SENSE_VALID) { | ||
1374 | ata_eh_qc_complete(qc); | ||
1375 | } else { | ||
1376 | /* feed zero TF to sense generation */ | ||
1377 | memset(&qc->result_tf, 0, sizeof(qc->result_tf)); | ||
1378 | ata_eh_qc_retry(qc); | ||
1379 | } | ||
1380 | } | ||
1381 | } | ||
1382 | } | ||
1383 | |||
1384 | /** | ||
1385 | * ata_do_eh - do standard error handling | ||
1386 | * @ap: host port to handle error for | ||
1387 | * @softreset: softreset method (can be NULL) | ||
1388 | * @hardreset: hardreset method (can be NULL) | ||
1389 | * @postreset: postreset method (can be NULL) | ||
1390 | * | ||
1391 | * Perform standard error handling sequence. | ||
1392 | * | ||
1393 | * LOCKING: | ||
1394 | * Kernel thread context (may sleep). | ||
1395 | */ | ||
1396 | void ata_do_eh(struct ata_port *ap, ata_reset_fn_t softreset, | ||
1397 | ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) | ||
1398 | { | ||
1399 | ata_eh_autopsy(ap); | ||
1400 | ata_eh_report(ap); | ||
1401 | ata_eh_recover(ap, softreset, hardreset, postreset); | ||
1402 | ata_eh_finish(ap); | ||
1403 | } | ||
diff --git a/include/linux/libata.h b/include/linux/libata.h index 298f9918e375..9fe46073cf8c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h | |||
@@ -247,6 +247,8 @@ enum { | |||
247 | 247 | ||
248 | /* how hard are we gonna try to probe/recover devices */ | 248 | /* how hard are we gonna try to probe/recover devices */ |
249 | ATA_PROBE_MAX_TRIES = 3, | 249 | ATA_PROBE_MAX_TRIES = 3, |
250 | ATA_EH_RESET_TRIES = 3, | ||
251 | ATA_EH_DEV_TRIES = 3, | ||
250 | }; | 252 | }; |
251 | 253 | ||
252 | enum hsm_task_states { | 254 | enum hsm_task_states { |
@@ -727,6 +729,9 @@ extern void ata_eh_thaw_port(struct ata_port *ap); | |||
727 | extern void ata_eh_qc_complete(struct ata_queued_cmd *qc); | 729 | extern void ata_eh_qc_complete(struct ata_queued_cmd *qc); |
728 | extern void ata_eh_qc_retry(struct ata_queued_cmd *qc); | 730 | extern void ata_eh_qc_retry(struct ata_queued_cmd *qc); |
729 | 731 | ||
732 | extern void ata_do_eh(struct ata_port *ap, ata_reset_fn_t softreset, | ||
733 | ata_reset_fn_t hardreset, ata_postreset_fn_t postreset); | ||
734 | |||
730 | /* | 735 | /* |
731 | * printk helpers | 736 | * printk helpers |
732 | */ | 737 | */ |