aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <htejun@gmail.com>2006-05-15 07:58:22 -0400
committerTejun Heo <htejun@gmail.com>2006-05-15 07:58:22 -0400
commit022bdb075b9e1f224088a0b268de56268d7bc5b6 (patch)
tree05878e28202f0a86bdcc32ae5d995db9f15042da
parentf3e81b19aac23c0e8c55d5961324ef7de44c23bb (diff)
[PATCH] libata-eh: implement new EH
Implement new EH. The exported interface is ata_do_eh() which is to be called from ->error_handler and performs the following steps to recover the failed port. ata_eh_autopsy() : analyze SError/TF, determine the cause of failure and required recovery actions and record it in ap->eh_context ata_eh_report() : report the failure to user ata_eh_recover() : perform recovery actions described in ap->eh_context ata_eh_finish() : finish failed qcs LLDDs can customize error handling by modifying eh_context before calling ata_do_eh() or, if necessary, doing so inbetween each major steps by calling each step explicitly. Signed-off-by: Tejun Heo <htejun@gmail.com>
-rw-r--r--drivers/scsi/libata-core.c1
-rw-r--r--drivers/scsi/libata-eh.c775
-rw-r--r--include/linux/libata.h5
3 files changed, 781 insertions, 0 deletions
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 4def48ed6f46..ddc47097d37e 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -5440,3 +5440,4 @@ EXPORT_SYMBOL_GPL(ata_eh_freeze_port);
5440EXPORT_SYMBOL_GPL(ata_eh_thaw_port); 5440EXPORT_SYMBOL_GPL(ata_eh_thaw_port);
5441EXPORT_SYMBOL_GPL(ata_eh_qc_complete); 5441EXPORT_SYMBOL_GPL(ata_eh_qc_complete);
5442EXPORT_SYMBOL_GPL(ata_eh_qc_retry); 5442EXPORT_SYMBOL_GPL(ata_eh_qc_retry);
5443EXPORT_SYMBOL_GPL(ata_do_eh);
diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c
index 1968f2d140f3..cd133f83e595 100644
--- a/drivers/scsi/libata-eh.c
+++ b/drivers/scsi/libata-eh.c
@@ -626,3 +626,778 @@ void ata_eh_qc_retry(struct ata_queued_cmd *qc)
626 scmd->retries--; 626 scmd->retries--;
627 __ata_eh_qc_complete(qc); 627 __ata_eh_qc_complete(qc);
628} 628}
629
630/**
631 * ata_eh_about_to_do - about to perform eh_action
632 * @ap: target ATA port
633 * @action: action about to be performed
634 *
635 * Called just before performing EH actions to clear related bits
636 * in @ap->eh_info such that eh actions are not unnecessarily
637 * repeated.
638 *
639 * LOCKING:
640 * None.
641 */
642static void ata_eh_about_to_do(struct ata_port *ap, unsigned int action)
643{
644 unsigned long flags;
645
646 spin_lock_irqsave(&ap->host_set->lock, flags);
647 ap->eh_info.action &= ~action;
648 ap->flags |= ATA_FLAG_RECOVERED;
649 spin_unlock_irqrestore(&ap->host_set->lock, flags);
650}
651
652/**
653 * ata_err_string - convert err_mask to descriptive string
654 * @err_mask: error mask to convert to string
655 *
656 * Convert @err_mask to descriptive string. Errors are
657 * prioritized according to severity and only the most severe
658 * error is reported.
659 *
660 * LOCKING:
661 * None.
662 *
663 * RETURNS:
664 * Descriptive string for @err_mask
665 */
666static const char * ata_err_string(unsigned int err_mask)
667{
668 if (err_mask & AC_ERR_HOST_BUS)
669 return "host bus error";
670 if (err_mask & AC_ERR_ATA_BUS)
671 return "ATA bus error";
672 if (err_mask & AC_ERR_TIMEOUT)
673 return "timeout";
674 if (err_mask & AC_ERR_HSM)
675 return "HSM violation";
676 if (err_mask & AC_ERR_SYSTEM)
677 return "internal error";
678 if (err_mask & AC_ERR_MEDIA)
679 return "media error";
680 if (err_mask & AC_ERR_INVALID)
681 return "invalid argument";
682 if (err_mask & AC_ERR_DEV)
683 return "device error";
684 return "unknown error";
685}
686
687/**
688 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
689 * @dev: device to perform REQUEST_SENSE to
690 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
691 *
692 * Perform ATAPI REQUEST_SENSE after the device reported CHECK
693 * SENSE. This function is EH helper.
694 *
695 * LOCKING:
696 * Kernel thread context (may sleep).
697 *
698 * RETURNS:
699 * 0 on success, AC_ERR_* mask on failure
700 */
701static unsigned int atapi_eh_request_sense(struct ata_device *dev,
702 unsigned char *sense_buf)
703{
704 struct ata_port *ap = dev->ap;
705 struct ata_taskfile tf;
706 u8 cdb[ATAPI_CDB_LEN];
707
708 DPRINTK("ATAPI request sense\n");
709
710 ata_tf_init(dev, &tf);
711
712 /* FIXME: is this needed? */
713 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE);
714
715 /* XXX: why tf_read here? */
716 ap->ops->tf_read(ap, &tf);
717
718 /* fill these in, for the case where they are -not- overwritten */
719 sense_buf[0] = 0x70;
720 sense_buf[2] = tf.feature >> 4;
721
722 memset(cdb, 0, ATAPI_CDB_LEN);
723 cdb[0] = REQUEST_SENSE;
724 cdb[4] = SCSI_SENSE_BUFFERSIZE;
725
726 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
727 tf.command = ATA_CMD_PACKET;
728
729 /* is it pointless to prefer PIO for "safety reasons"? */
730 if (ap->flags & ATA_FLAG_PIO_DMA) {
731 tf.protocol = ATA_PROT_ATAPI_DMA;
732 tf.feature |= ATAPI_PKT_DMA;
733 } else {
734 tf.protocol = ATA_PROT_ATAPI;
735 tf.lbam = (8 * 1024) & 0xff;
736 tf.lbah = (8 * 1024) >> 8;
737 }
738
739 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
740 sense_buf, SCSI_SENSE_BUFFERSIZE);
741}
742
743/**
744 * ata_eh_analyze_serror - analyze SError for a failed port
745 * @ap: ATA port to analyze SError for
746 *
747 * Analyze SError if available and further determine cause of
748 * failure.
749 *
750 * LOCKING:
751 * None.
752 */
753static void ata_eh_analyze_serror(struct ata_port *ap)
754{
755 struct ata_eh_context *ehc = &ap->eh_context;
756 u32 serror = ehc->i.serror;
757 unsigned int err_mask = 0, action = 0;
758
759 if (serror & SERR_PERSISTENT) {
760 err_mask |= AC_ERR_ATA_BUS;
761 action |= ATA_EH_HARDRESET;
762 }
763 if (serror &
764 (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) {
765 err_mask |= AC_ERR_ATA_BUS;
766 action |= ATA_EH_SOFTRESET;
767 }
768 if (serror & SERR_PROTOCOL) {
769 err_mask |= AC_ERR_HSM;
770 action |= ATA_EH_SOFTRESET;
771 }
772 if (serror & SERR_INTERNAL) {
773 err_mask |= AC_ERR_SYSTEM;
774 action |= ATA_EH_SOFTRESET;
775 }
776 if (serror & (SERR_PHYRDY_CHG | SERR_DEV_XCHG)) {
777 err_mask |= AC_ERR_ATA_BUS;
778 action |= ATA_EH_HARDRESET;
779 }
780
781 ehc->i.err_mask |= err_mask;
782 ehc->i.action |= action;
783}
784
785/**
786 * ata_eh_analyze_tf - analyze taskfile of a failed qc
787 * @qc: qc to analyze
788 * @tf: Taskfile registers to analyze
789 *
790 * Analyze taskfile of @qc and further determine cause of
791 * failure. This function also requests ATAPI sense data if
792 * avaliable.
793 *
794 * LOCKING:
795 * Kernel thread context (may sleep).
796 *
797 * RETURNS:
798 * Determined recovery action
799 */
800static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
801 const struct ata_taskfile *tf)
802{
803 unsigned int tmp, action = 0;
804 u8 stat = tf->command, err = tf->feature;
805
806 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) {
807 qc->err_mask |= AC_ERR_HSM;
808 return ATA_EH_SOFTRESET;
809 }
810
811 if (!(qc->err_mask & AC_ERR_DEV))
812 return 0;
813
814 switch (qc->dev->class) {
815 case ATA_DEV_ATA:
816 if (err & ATA_ICRC)
817 qc->err_mask |= AC_ERR_ATA_BUS;
818 if (err & ATA_UNC)
819 qc->err_mask |= AC_ERR_MEDIA;
820 if (err & ATA_IDNF)
821 qc->err_mask |= AC_ERR_INVALID;
822 break;
823
824 case ATA_DEV_ATAPI:
825 tmp = atapi_eh_request_sense(qc->dev,
826 qc->scsicmd->sense_buffer);
827 if (!tmp) {
828 /* ATA_QCFLAG_SENSE_VALID is used to tell
829 * atapi_qc_complete() that sense data is
830 * already valid.
831 *
832 * TODO: interpret sense data and set
833 * appropriate err_mask.
834 */
835 qc->flags |= ATA_QCFLAG_SENSE_VALID;
836 } else
837 qc->err_mask |= tmp;
838 }
839
840 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS))
841 action |= ATA_EH_SOFTRESET;
842
843 return action;
844}
845
846static int ata_eh_categorize_ering_entry(struct ata_ering_entry *ent)
847{
848 if (ent->err_mask & (AC_ERR_ATA_BUS | AC_ERR_TIMEOUT))
849 return 1;
850
851 if (ent->is_io) {
852 if (ent->err_mask & AC_ERR_HSM)
853 return 1;
854 if ((ent->err_mask &
855 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
856 return 2;
857 }
858
859 return 0;
860}
861
862struct speed_down_needed_arg {
863 u64 since;
864 int nr_errors[3];
865};
866
867static int speed_down_needed_cb(struct ata_ering_entry *ent, void *void_arg)
868{
869 struct speed_down_needed_arg *arg = void_arg;
870
871 if (ent->timestamp < arg->since)
872 return -1;
873
874 arg->nr_errors[ata_eh_categorize_ering_entry(ent)]++;
875 return 0;
876}
877
878/**
879 * ata_eh_speed_down_needed - Determine wheter speed down is necessary
880 * @dev: Device of interest
881 *
882 * This function examines error ring of @dev and determines
883 * whether speed down is necessary. Speed down is necessary if
884 * there have been more than 3 of Cat-1 errors or 10 of Cat-2
885 * errors during last 15 minutes.
886 *
887 * Cat-1 errors are ATA_BUS, TIMEOUT for any command and HSM
888 * violation for known supported commands.
889 *
890 * Cat-2 errors are unclassified DEV error for known supported
891 * command.
892 *
893 * LOCKING:
894 * Inherited from caller.
895 *
896 * RETURNS:
897 * 1 if speed down is necessary, 0 otherwise
898 */
899static int ata_eh_speed_down_needed(struct ata_device *dev)
900{
901 const u64 interval = 15LLU * 60 * HZ;
902 static const int err_limits[3] = { -1, 3, 10 };
903 struct speed_down_needed_arg arg;
904 struct ata_ering_entry *ent;
905 int err_cat;
906 u64 j64;
907
908 ent = ata_ering_top(&dev->ering);
909 if (!ent)
910 return 0;
911
912 err_cat = ata_eh_categorize_ering_entry(ent);
913 if (err_cat == 0)
914 return 0;
915
916 memset(&arg, 0, sizeof(arg));
917
918 j64 = get_jiffies_64();
919 if (j64 >= interval)
920 arg.since = j64 - interval;
921 else
922 arg.since = 0;
923
924 ata_ering_map(&dev->ering, speed_down_needed_cb, &arg);
925
926 return arg.nr_errors[err_cat] > err_limits[err_cat];
927}
928
929/**
930 * ata_eh_speed_down - record error and speed down if necessary
931 * @dev: Failed device
932 * @is_io: Did the device fail during normal IO?
933 * @err_mask: err_mask of the error
934 *
935 * Record error and examine error history to determine whether
936 * adjusting transmission speed is necessary. It also sets
937 * transmission limits appropriately if such adjustment is
938 * necessary.
939 *
940 * LOCKING:
941 * Kernel thread context (may sleep).
942 *
943 * RETURNS:
944 * 0 on success, -errno otherwise
945 */
946static int ata_eh_speed_down(struct ata_device *dev, int is_io,
947 unsigned int err_mask)
948{
949 if (!err_mask)
950 return 0;
951
952 /* record error and determine whether speed down is necessary */
953 ata_ering_record(&dev->ering, is_io, err_mask);
954
955 if (!ata_eh_speed_down_needed(dev))
956 return 0;
957
958 /* speed down SATA link speed if possible */
959 if (sata_down_spd_limit(dev->ap) == 0)
960 return ATA_EH_HARDRESET;
961
962 /* lower transfer mode */
963 if (ata_down_xfermask_limit(dev, 0) == 0)
964 return ATA_EH_SOFTRESET;
965
966 ata_dev_printk(dev, KERN_ERR,
967 "speed down requested but no transfer mode left\n");
968 return 0;
969}
970
971/**
972 * ata_eh_autopsy - analyze error and determine recovery action
973 * @ap: ATA port to perform autopsy on
974 *
975 * Analyze why @ap failed and determine which recovery action is
976 * needed. This function also sets more detailed AC_ERR_* values
977 * and fills sense data for ATAPI CHECK SENSE.
978 *
979 * LOCKING:
980 * Kernel thread context (may sleep).
981 */
982static void ata_eh_autopsy(struct ata_port *ap)
983{
984 struct ata_eh_context *ehc = &ap->eh_context;
985 unsigned int action = ehc->i.action;
986 struct ata_device *failed_dev = NULL;
987 unsigned int all_err_mask = 0;
988 int tag, is_io = 0;
989 u32 serror;
990 int rc;
991
992 DPRINTK("ENTER\n");
993
994 /* obtain and analyze SError */
995 rc = sata_scr_read(ap, SCR_ERROR, &serror);
996 if (rc == 0) {
997 ehc->i.serror |= serror;
998 ata_eh_analyze_serror(ap);
999 } else if (rc != -EOPNOTSUPP)
1000 action |= ATA_EH_HARDRESET;
1001
1002 /* any real error trumps AC_ERR_OTHER */
1003 if (ehc->i.err_mask & ~AC_ERR_OTHER)
1004 ehc->i.err_mask &= ~AC_ERR_OTHER;
1005
1006 all_err_mask |= ehc->i.err_mask;
1007
1008 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
1009 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
1010
1011 if (!(qc->flags & ATA_QCFLAG_FAILED))
1012 continue;
1013
1014 /* inherit upper level err_mask */
1015 qc->err_mask |= ehc->i.err_mask;
1016
1017 if (qc->err_mask & AC_ERR_TIMEOUT)
1018 action |= ATA_EH_SOFTRESET;
1019
1020 /* analyze TF */
1021 action |= ata_eh_analyze_tf(qc, &qc->result_tf);
1022
1023 /* DEV errors are probably spurious in case of ATA_BUS error */
1024 if (qc->err_mask & AC_ERR_ATA_BUS)
1025 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA |
1026 AC_ERR_INVALID);
1027
1028 /* any real error trumps unknown error */
1029 if (qc->err_mask & ~AC_ERR_OTHER)
1030 qc->err_mask &= ~AC_ERR_OTHER;
1031
1032 /* SENSE_VALID trumps dev/unknown error and revalidation */
1033 if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
1034 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER);
1035 action &= ~ATA_EH_REVALIDATE;
1036 }
1037
1038 /* accumulate error info */
1039 failed_dev = qc->dev;
1040 all_err_mask |= qc->err_mask;
1041 if (qc->flags & ATA_QCFLAG_IO)
1042 is_io = 1;
1043 }
1044
1045 /* speed down iff command was in progress */
1046 if (failed_dev)
1047 action |= ata_eh_speed_down(failed_dev, is_io, all_err_mask);
1048
1049 if (all_err_mask)
1050 action |= ATA_EH_REVALIDATE;
1051
1052 ehc->i.dev = failed_dev;
1053 ehc->i.action = action;
1054
1055 DPRINTK("EXIT\n");
1056}
1057
1058/**
1059 * ata_eh_report - report error handling to user
1060 * @ap: ATA port EH is going on
1061 *
1062 * Report EH to user.
1063 *
1064 * LOCKING:
1065 * None.
1066 */
1067static void ata_eh_report(struct ata_port *ap)
1068{
1069 struct ata_eh_context *ehc = &ap->eh_context;
1070 const char *frozen, *desc;
1071 int tag, nr_failed = 0;
1072
1073 desc = NULL;
1074 if (ehc->i.desc[0] != '\0')
1075 desc = ehc->i.desc;
1076
1077 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
1078 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
1079
1080 if (!(qc->flags & ATA_QCFLAG_FAILED))
1081 continue;
1082 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask)
1083 continue;
1084
1085 nr_failed++;
1086 }
1087
1088 if (!nr_failed && !ehc->i.err_mask)
1089 return;
1090
1091 frozen = "";
1092 if (ap->flags & ATA_FLAG_FROZEN)
1093 frozen = " frozen";
1094
1095 if (ehc->i.dev) {
1096 ata_dev_printk(ehc->i.dev, KERN_ERR,
1097 "exception Emask 0x%x SErr 0x%x action 0x%x%s\n",
1098 ehc->i.err_mask, ehc->i.serror, ehc->i.action,
1099 frozen);
1100 if (desc)
1101 ata_dev_printk(ehc->i.dev, KERN_ERR, "(%s)\n", desc);
1102 } else {
1103 ata_port_printk(ap, KERN_ERR,
1104 "exception Emask 0x%x SErr 0x%x action 0x%x%s\n",
1105 ehc->i.err_mask, ehc->i.serror, ehc->i.action,
1106 frozen);
1107 if (desc)
1108 ata_port_printk(ap, KERN_ERR, "(%s)\n", desc);
1109 }
1110
1111 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
1112 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
1113
1114 if (!(qc->flags & ATA_QCFLAG_FAILED) || !qc->err_mask)
1115 continue;
1116
1117 ata_dev_printk(qc->dev, KERN_ERR, "tag %d cmd 0x%x "
1118 "Emask 0x%x stat 0x%x err 0x%x (%s)\n",
1119 qc->tag, qc->tf.command, qc->err_mask,
1120 qc->result_tf.command, qc->result_tf.feature,
1121 ata_err_string(qc->err_mask));
1122 }
1123}
1124
1125static int ata_eh_reset(struct ata_port *ap, ata_reset_fn_t softreset,
1126 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
1127{
1128 struct ata_eh_context *ehc = &ap->eh_context;
1129 unsigned int classes[ATA_MAX_DEVICES];
1130 int tries = ATA_EH_RESET_TRIES;
1131 ata_reset_fn_t reset;
1132 int rc;
1133
1134 if (softreset && (!hardreset || (!sata_set_spd_needed(ap) &&
1135 !(ehc->i.action & ATA_EH_HARDRESET))))
1136 reset = softreset;
1137 else
1138 reset = hardreset;
1139
1140 retry:
1141 ata_port_printk(ap, KERN_INFO, "%s resetting port\n",
1142 reset == softreset ? "soft" : "hard");
1143
1144 /* reset */
1145 ata_eh_about_to_do(ap, ATA_EH_RESET_MASK);
1146 ehc->i.flags |= ATA_EHI_DID_RESET;
1147
1148 rc = ata_do_reset(ap, reset, classes);
1149
1150 if (rc && --tries) {
1151 ata_port_printk(ap, KERN_WARNING,
1152 "%sreset failed, retrying in 5 secs\n",
1153 reset == softreset ? "soft" : "hard");
1154 ssleep(5);
1155
1156 if (reset == hardreset)
1157 sata_down_spd_limit(ap);
1158 if (hardreset)
1159 reset = hardreset;
1160 goto retry;
1161 }
1162
1163 if (rc == 0) {
1164 if (postreset)
1165 postreset(ap, classes);
1166
1167 /* reset successful, schedule revalidation */
1168 ehc->i.dev = NULL;
1169 ehc->i.action &= ~ATA_EH_RESET_MASK;
1170 ehc->i.action |= ATA_EH_REVALIDATE;
1171 }
1172
1173 return rc;
1174}
1175
1176static int ata_eh_revalidate(struct ata_port *ap,
1177 struct ata_device **r_failed_dev)
1178{
1179 struct ata_eh_context *ehc = &ap->eh_context;
1180 struct ata_device *dev;
1181 int i, rc = 0;
1182
1183 DPRINTK("ENTER\n");
1184
1185 for (i = 0; i < ATA_MAX_DEVICES; i++) {
1186 dev = &ap->device[i];
1187
1188 if (ehc->i.action & ATA_EH_REVALIDATE && ata_dev_enabled(dev) &&
1189 (!ehc->i.dev || ehc->i.dev == dev)) {
1190 if (ata_port_offline(ap)) {
1191 rc = -EIO;
1192 break;
1193 }
1194
1195 ata_eh_about_to_do(ap, ATA_EH_REVALIDATE);
1196 rc = ata_dev_revalidate(dev,
1197 ehc->i.flags & ATA_EHI_DID_RESET);
1198 if (rc)
1199 break;
1200
1201 ehc->i.action &= ~ATA_EH_REVALIDATE;
1202 }
1203 }
1204
1205 if (rc)
1206 *r_failed_dev = dev;
1207
1208 DPRINTK("EXIT\n");
1209 return rc;
1210}
1211
1212static int ata_port_nr_enabled(struct ata_port *ap)
1213{
1214 int i, cnt = 0;
1215
1216 for (i = 0; i < ATA_MAX_DEVICES; i++)
1217 if (ata_dev_enabled(&ap->device[i]))
1218 cnt++;
1219 return cnt;
1220}
1221
1222/**
1223 * ata_eh_recover - recover host port after error
1224 * @ap: host port to recover
1225 * @softreset: softreset method (can be NULL)
1226 * @hardreset: hardreset method (can be NULL)
1227 * @postreset: postreset method (can be NULL)
1228 *
1229 * This is the alpha and omega, eum and yang, heart and soul of
1230 * libata exception handling. On entry, actions required to
1231 * recover each devices are recorded in eh_context. This
1232 * function executes all the operations with appropriate retrials
1233 * and fallbacks to resurrect failed devices.
1234 *
1235 * LOCKING:
1236 * Kernel thread context (may sleep).
1237 *
1238 * RETURNS:
1239 * 0 on success, -errno on failure.
1240 */
1241static int ata_eh_recover(struct ata_port *ap, ata_reset_fn_t softreset,
1242 ata_reset_fn_t hardreset,
1243 ata_postreset_fn_t postreset)
1244{
1245 struct ata_eh_context *ehc = &ap->eh_context;
1246 struct ata_device *dev;
1247 int down_xfermask, i, rc;
1248
1249 DPRINTK("ENTER\n");
1250
1251 /* prep for recovery */
1252 for (i = 0; i < ATA_MAX_DEVICES; i++) {
1253 dev = &ap->device[i];
1254
1255 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
1256 }
1257
1258 retry:
1259 down_xfermask = 0;
1260 rc = 0;
1261
1262 /* skip EH if possible. */
1263 if (!ata_port_nr_enabled(ap) && !(ap->flags & ATA_FLAG_FROZEN))
1264 ehc->i.action = 0;
1265
1266 /* reset */
1267 if (ehc->i.action & ATA_EH_RESET_MASK) {
1268 ata_eh_freeze_port(ap);
1269
1270 rc = ata_eh_reset(ap, softreset, hardreset, postreset);
1271 if (rc) {
1272 ata_port_printk(ap, KERN_ERR,
1273 "reset failed, giving up\n");
1274 goto out;
1275 }
1276
1277 ata_eh_thaw_port(ap);
1278 }
1279
1280 /* revalidate existing devices */
1281 rc = ata_eh_revalidate(ap, &dev);
1282 if (rc)
1283 goto dev_fail;
1284
1285 /* configure transfer mode if the port has been reset */
1286 if (ehc->i.flags & ATA_EHI_DID_RESET) {
1287 rc = ata_set_mode(ap, &dev);
1288 if (rc) {
1289 down_xfermask = 1;
1290 goto dev_fail;
1291 }
1292 }
1293
1294 goto out;
1295
1296 dev_fail:
1297 switch (rc) {
1298 case -ENODEV:
1299 case -EINVAL:
1300 ehc->tries[dev->devno] = 0;
1301 break;
1302 case -EIO:
1303 sata_down_spd_limit(ap);
1304 default:
1305 ehc->tries[dev->devno]--;
1306 if (down_xfermask &&
1307 ata_down_xfermask_limit(dev, ehc->tries[dev->devno] == 1))
1308 ehc->tries[dev->devno] = 0;
1309 }
1310
1311 /* disable device if it has used up all its chances */
1312 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno])
1313 ata_dev_disable(dev);
1314
1315 /* soft didn't work? be haaaaard */
1316 if (ehc->i.flags & ATA_EHI_DID_RESET)
1317 ehc->i.action |= ATA_EH_HARDRESET;
1318 else
1319 ehc->i.action |= ATA_EH_SOFTRESET;
1320
1321 if (ata_port_nr_enabled(ap)) {
1322 ata_port_printk(ap, KERN_WARNING, "failed to recover some "
1323 "devices, retrying in 5 secs\n");
1324 ssleep(5);
1325 } else {
1326 /* no device left, repeat fast */
1327 msleep(500);
1328 }
1329
1330 goto retry;
1331
1332 out:
1333 if (rc) {
1334 for (i = 0; i < ATA_MAX_DEVICES; i++)
1335 ata_dev_disable(&ap->device[i]);
1336 }
1337
1338 DPRINTK("EXIT, rc=%d\n", rc);
1339 return rc;
1340}
1341
1342/**
1343 * ata_eh_finish - finish up EH
1344 * @ap: host port to finish EH for
1345 *
1346 * Recovery is complete. Clean up EH states and retry or finish
1347 * failed qcs.
1348 *
1349 * LOCKING:
1350 * None.
1351 */
1352static void ata_eh_finish(struct ata_port *ap)
1353{
1354 int tag;
1355
1356 /* retry or finish qcs */
1357 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
1358 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
1359
1360 if (!(qc->flags & ATA_QCFLAG_FAILED))
1361 continue;
1362
1363 if (qc->err_mask) {
1364 /* FIXME: Once EH migration is complete,
1365 * generate sense data in this function,
1366 * considering both err_mask and tf.
1367 */
1368 if (qc->err_mask & AC_ERR_INVALID)
1369 ata_eh_qc_complete(qc);
1370 else
1371 ata_eh_qc_retry(qc);
1372 } else {
1373 if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
1374 ata_eh_qc_complete(qc);
1375 } else {
1376 /* feed zero TF to sense generation */
1377 memset(&qc->result_tf, 0, sizeof(qc->result_tf));
1378 ata_eh_qc_retry(qc);
1379 }
1380 }
1381 }
1382}
1383
1384/**
1385 * ata_do_eh - do standard error handling
1386 * @ap: host port to handle error for
1387 * @softreset: softreset method (can be NULL)
1388 * @hardreset: hardreset method (can be NULL)
1389 * @postreset: postreset method (can be NULL)
1390 *
1391 * Perform standard error handling sequence.
1392 *
1393 * LOCKING:
1394 * Kernel thread context (may sleep).
1395 */
1396void ata_do_eh(struct ata_port *ap, ata_reset_fn_t softreset,
1397 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
1398{
1399 ata_eh_autopsy(ap);
1400 ata_eh_report(ap);
1401 ata_eh_recover(ap, softreset, hardreset, postreset);
1402 ata_eh_finish(ap);
1403}
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 298f9918e375..9fe46073cf8c 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -247,6 +247,8 @@ enum {
247 247
248 /* how hard are we gonna try to probe/recover devices */ 248 /* how hard are we gonna try to probe/recover devices */
249 ATA_PROBE_MAX_TRIES = 3, 249 ATA_PROBE_MAX_TRIES = 3,
250 ATA_EH_RESET_TRIES = 3,
251 ATA_EH_DEV_TRIES = 3,
250}; 252};
251 253
252enum hsm_task_states { 254enum hsm_task_states {
@@ -727,6 +729,9 @@ extern void ata_eh_thaw_port(struct ata_port *ap);
727extern void ata_eh_qc_complete(struct ata_queued_cmd *qc); 729extern void ata_eh_qc_complete(struct ata_queued_cmd *qc);
728extern void ata_eh_qc_retry(struct ata_queued_cmd *qc); 730extern void ata_eh_qc_retry(struct ata_queued_cmd *qc);
729 731
732extern void ata_do_eh(struct ata_port *ap, ata_reset_fn_t softreset,
733 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset);
734
730/* 735/*
731 * printk helpers 736 * printk helpers
732 */ 737 */