diff options
| author | Tejun Heo <htejun@gmail.com> | 2006-05-15 07:58:22 -0400 |
|---|---|---|
| committer | Tejun Heo <htejun@gmail.com> | 2006-05-15 07:58:22 -0400 |
| commit | 022bdb075b9e1f224088a0b268de56268d7bc5b6 (patch) | |
| tree | 05878e28202f0a86bdcc32ae5d995db9f15042da | |
| parent | f3e81b19aac23c0e8c55d5961324ef7de44c23bb (diff) | |
[PATCH] libata-eh: implement new EH
Implement new EH. The exported interface is ata_do_eh() which is to
be called from ->error_handler and performs the following steps to
recover the failed port.
ata_eh_autopsy() : analyze SError/TF, determine the cause of failure
and required recovery actions and record it in
ap->eh_context
ata_eh_report() : report the failure to user
ata_eh_recover() : perform recovery actions described in ap->eh_context
ata_eh_finish() : finish failed qcs
LLDDs can customize error handling by modifying eh_context before
calling ata_do_eh() or, if necessary, doing so inbetween each major
steps by calling each step explicitly.
Signed-off-by: Tejun Heo <htejun@gmail.com>
| -rw-r--r-- | drivers/scsi/libata-core.c | 1 | ||||
| -rw-r--r-- | drivers/scsi/libata-eh.c | 775 | ||||
| -rw-r--r-- | include/linux/libata.h | 5 |
3 files changed, 781 insertions, 0 deletions
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 4def48ed6f46..ddc47097d37e 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c | |||
| @@ -5440,3 +5440,4 @@ EXPORT_SYMBOL_GPL(ata_eh_freeze_port); | |||
| 5440 | EXPORT_SYMBOL_GPL(ata_eh_thaw_port); | 5440 | EXPORT_SYMBOL_GPL(ata_eh_thaw_port); |
| 5441 | EXPORT_SYMBOL_GPL(ata_eh_qc_complete); | 5441 | EXPORT_SYMBOL_GPL(ata_eh_qc_complete); |
| 5442 | EXPORT_SYMBOL_GPL(ata_eh_qc_retry); | 5442 | EXPORT_SYMBOL_GPL(ata_eh_qc_retry); |
| 5443 | EXPORT_SYMBOL_GPL(ata_do_eh); | ||
diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c index 1968f2d140f3..cd133f83e595 100644 --- a/drivers/scsi/libata-eh.c +++ b/drivers/scsi/libata-eh.c | |||
| @@ -626,3 +626,778 @@ void ata_eh_qc_retry(struct ata_queued_cmd *qc) | |||
| 626 | scmd->retries--; | 626 | scmd->retries--; |
| 627 | __ata_eh_qc_complete(qc); | 627 | __ata_eh_qc_complete(qc); |
| 628 | } | 628 | } |
| 629 | |||
| 630 | /** | ||
| 631 | * ata_eh_about_to_do - about to perform eh_action | ||
| 632 | * @ap: target ATA port | ||
| 633 | * @action: action about to be performed | ||
| 634 | * | ||
| 635 | * Called just before performing EH actions to clear related bits | ||
| 636 | * in @ap->eh_info such that eh actions are not unnecessarily | ||
| 637 | * repeated. | ||
| 638 | * | ||
| 639 | * LOCKING: | ||
| 640 | * None. | ||
| 641 | */ | ||
| 642 | static void ata_eh_about_to_do(struct ata_port *ap, unsigned int action) | ||
| 643 | { | ||
| 644 | unsigned long flags; | ||
| 645 | |||
| 646 | spin_lock_irqsave(&ap->host_set->lock, flags); | ||
| 647 | ap->eh_info.action &= ~action; | ||
| 648 | ap->flags |= ATA_FLAG_RECOVERED; | ||
| 649 | spin_unlock_irqrestore(&ap->host_set->lock, flags); | ||
| 650 | } | ||
| 651 | |||
| 652 | /** | ||
| 653 | * ata_err_string - convert err_mask to descriptive string | ||
| 654 | * @err_mask: error mask to convert to string | ||
| 655 | * | ||
| 656 | * Convert @err_mask to descriptive string. Errors are | ||
| 657 | * prioritized according to severity and only the most severe | ||
| 658 | * error is reported. | ||
| 659 | * | ||
| 660 | * LOCKING: | ||
| 661 | * None. | ||
| 662 | * | ||
| 663 | * RETURNS: | ||
| 664 | * Descriptive string for @err_mask | ||
| 665 | */ | ||
| 666 | static const char * ata_err_string(unsigned int err_mask) | ||
| 667 | { | ||
| 668 | if (err_mask & AC_ERR_HOST_BUS) | ||
| 669 | return "host bus error"; | ||
| 670 | if (err_mask & AC_ERR_ATA_BUS) | ||
| 671 | return "ATA bus error"; | ||
| 672 | if (err_mask & AC_ERR_TIMEOUT) | ||
| 673 | return "timeout"; | ||
| 674 | if (err_mask & AC_ERR_HSM) | ||
| 675 | return "HSM violation"; | ||
| 676 | if (err_mask & AC_ERR_SYSTEM) | ||
| 677 | return "internal error"; | ||
| 678 | if (err_mask & AC_ERR_MEDIA) | ||
| 679 | return "media error"; | ||
| 680 | if (err_mask & AC_ERR_INVALID) | ||
| 681 | return "invalid argument"; | ||
| 682 | if (err_mask & AC_ERR_DEV) | ||
| 683 | return "device error"; | ||
| 684 | return "unknown error"; | ||
| 685 | } | ||
| 686 | |||
| 687 | /** | ||
| 688 | * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE | ||
| 689 | * @dev: device to perform REQUEST_SENSE to | ||
| 690 | * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) | ||
| 691 | * | ||
| 692 | * Perform ATAPI REQUEST_SENSE after the device reported CHECK | ||
| 693 | * SENSE. This function is EH helper. | ||
| 694 | * | ||
| 695 | * LOCKING: | ||
| 696 | * Kernel thread context (may sleep). | ||
| 697 | * | ||
| 698 | * RETURNS: | ||
| 699 | * 0 on success, AC_ERR_* mask on failure | ||
| 700 | */ | ||
| 701 | static unsigned int atapi_eh_request_sense(struct ata_device *dev, | ||
| 702 | unsigned char *sense_buf) | ||
| 703 | { | ||
| 704 | struct ata_port *ap = dev->ap; | ||
| 705 | struct ata_taskfile tf; | ||
| 706 | u8 cdb[ATAPI_CDB_LEN]; | ||
| 707 | |||
| 708 | DPRINTK("ATAPI request sense\n"); | ||
| 709 | |||
| 710 | ata_tf_init(dev, &tf); | ||
| 711 | |||
| 712 | /* FIXME: is this needed? */ | ||
| 713 | memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); | ||
| 714 | |||
| 715 | /* XXX: why tf_read here? */ | ||
| 716 | ap->ops->tf_read(ap, &tf); | ||
| 717 | |||
| 718 | /* fill these in, for the case where they are -not- overwritten */ | ||
| 719 | sense_buf[0] = 0x70; | ||
| 720 | sense_buf[2] = tf.feature >> 4; | ||
| 721 | |||
| 722 | memset(cdb, 0, ATAPI_CDB_LEN); | ||
| 723 | cdb[0] = REQUEST_SENSE; | ||
| 724 | cdb[4] = SCSI_SENSE_BUFFERSIZE; | ||
| 725 | |||
| 726 | tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; | ||
| 727 | tf.command = ATA_CMD_PACKET; | ||
| 728 | |||
| 729 | /* is it pointless to prefer PIO for "safety reasons"? */ | ||
| 730 | if (ap->flags & ATA_FLAG_PIO_DMA) { | ||
| 731 | tf.protocol = ATA_PROT_ATAPI_DMA; | ||
| 732 | tf.feature |= ATAPI_PKT_DMA; | ||
| 733 | } else { | ||
| 734 | tf.protocol = ATA_PROT_ATAPI; | ||
| 735 | tf.lbam = (8 * 1024) & 0xff; | ||
| 736 | tf.lbah = (8 * 1024) >> 8; | ||
| 737 | } | ||
| 738 | |||
| 739 | return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, | ||
| 740 | sense_buf, SCSI_SENSE_BUFFERSIZE); | ||
| 741 | } | ||
| 742 | |||
| 743 | /** | ||
| 744 | * ata_eh_analyze_serror - analyze SError for a failed port | ||
| 745 | * @ap: ATA port to analyze SError for | ||
| 746 | * | ||
| 747 | * Analyze SError if available and further determine cause of | ||
| 748 | * failure. | ||
| 749 | * | ||
| 750 | * LOCKING: | ||
| 751 | * None. | ||
| 752 | */ | ||
| 753 | static void ata_eh_analyze_serror(struct ata_port *ap) | ||
| 754 | { | ||
| 755 | struct ata_eh_context *ehc = &ap->eh_context; | ||
| 756 | u32 serror = ehc->i.serror; | ||
| 757 | unsigned int err_mask = 0, action = 0; | ||
| 758 | |||
| 759 | if (serror & SERR_PERSISTENT) { | ||
| 760 | err_mask |= AC_ERR_ATA_BUS; | ||
| 761 | action |= ATA_EH_HARDRESET; | ||
| 762 | } | ||
| 763 | if (serror & | ||
| 764 | (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) { | ||
| 765 | err_mask |= AC_ERR_ATA_BUS; | ||
| 766 | action |= ATA_EH_SOFTRESET; | ||
| 767 | } | ||
| 768 | if (serror & SERR_PROTOCOL) { | ||
| 769 | err_mask |= AC_ERR_HSM; | ||
| 770 | action |= ATA_EH_SOFTRESET; | ||
| 771 | } | ||
| 772 | if (serror & SERR_INTERNAL) { | ||
| 773 | err_mask |= AC_ERR_SYSTEM; | ||
| 774 | action |= ATA_EH_SOFTRESET; | ||
| 775 | } | ||
| 776 | if (serror & (SERR_PHYRDY_CHG | SERR_DEV_XCHG)) { | ||
| 777 | err_mask |= AC_ERR_ATA_BUS; | ||
| 778 | action |= ATA_EH_HARDRESET; | ||
| 779 | } | ||
| 780 | |||
| 781 | ehc->i.err_mask |= err_mask; | ||
| 782 | ehc->i.action |= action; | ||
| 783 | } | ||
| 784 | |||
| 785 | /** | ||
| 786 | * ata_eh_analyze_tf - analyze taskfile of a failed qc | ||
| 787 | * @qc: qc to analyze | ||
| 788 | * @tf: Taskfile registers to analyze | ||
| 789 | * | ||
| 790 | * Analyze taskfile of @qc and further determine cause of | ||
| 791 | * failure. This function also requests ATAPI sense data if | ||
| 792 | * avaliable. | ||
| 793 | * | ||
| 794 | * LOCKING: | ||
| 795 | * Kernel thread context (may sleep). | ||
| 796 | * | ||
| 797 | * RETURNS: | ||
| 798 | * Determined recovery action | ||
| 799 | */ | ||
| 800 | static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, | ||
| 801 | const struct ata_taskfile *tf) | ||
| 802 | { | ||
| 803 | unsigned int tmp, action = 0; | ||
| 804 | u8 stat = tf->command, err = tf->feature; | ||
| 805 | |||
| 806 | if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { | ||
| 807 | qc->err_mask |= AC_ERR_HSM; | ||
| 808 | return ATA_EH_SOFTRESET; | ||
| 809 | } | ||
| 810 | |||
| 811 | if (!(qc->err_mask & AC_ERR_DEV)) | ||
| 812 | return 0; | ||
| 813 | |||
| 814 | switch (qc->dev->class) { | ||
| 815 | case ATA_DEV_ATA: | ||
| 816 | if (err & ATA_ICRC) | ||
| 817 | qc->err_mask |= AC_ERR_ATA_BUS; | ||
| 818 | if (err & ATA_UNC) | ||
| 819 | qc->err_mask |= AC_ERR_MEDIA; | ||
| 820 | if (err & ATA_IDNF) | ||
| 821 | qc->err_mask |= AC_ERR_INVALID; | ||
| 822 | break; | ||
| 823 | |||
| 824 | case ATA_DEV_ATAPI: | ||
| 825 | tmp = atapi_eh_request_sense(qc->dev, | ||
| 826 | qc->scsicmd->sense_buffer); | ||
| 827 | if (!tmp) { | ||
| 828 | /* ATA_QCFLAG_SENSE_VALID is used to tell | ||
| 829 | * atapi_qc_complete() that sense data is | ||
| 830 | * already valid. | ||
| 831 | * | ||
| 832 | * TODO: interpret sense data and set | ||
| 833 | * appropriate err_mask. | ||
| 834 | */ | ||
| 835 | qc->flags |= ATA_QCFLAG_SENSE_VALID; | ||
| 836 | } else | ||
| 837 | qc->err_mask |= tmp; | ||
| 838 | } | ||
| 839 | |||
| 840 | if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) | ||
| 841 | action |= ATA_EH_SOFTRESET; | ||
| 842 | |||
| 843 | return action; | ||
| 844 | } | ||
| 845 | |||
| 846 | static int ata_eh_categorize_ering_entry(struct ata_ering_entry *ent) | ||
| 847 | { | ||
| 848 | if (ent->err_mask & (AC_ERR_ATA_BUS | AC_ERR_TIMEOUT)) | ||
| 849 | return 1; | ||
| 850 | |||
| 851 | if (ent->is_io) { | ||
| 852 | if (ent->err_mask & AC_ERR_HSM) | ||
| 853 | return 1; | ||
| 854 | if ((ent->err_mask & | ||
| 855 | (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) | ||
| 856 | return 2; | ||
| 857 | } | ||
| 858 | |||
| 859 | return 0; | ||
| 860 | } | ||
| 861 | |||
| 862 | struct speed_down_needed_arg { | ||
| 863 | u64 since; | ||
| 864 | int nr_errors[3]; | ||
| 865 | }; | ||
| 866 | |||
| 867 | static int speed_down_needed_cb(struct ata_ering_entry *ent, void *void_arg) | ||
| 868 | { | ||
| 869 | struct speed_down_needed_arg *arg = void_arg; | ||
| 870 | |||
| 871 | if (ent->timestamp < arg->since) | ||
| 872 | return -1; | ||
| 873 | |||
| 874 | arg->nr_errors[ata_eh_categorize_ering_entry(ent)]++; | ||
| 875 | return 0; | ||
| 876 | } | ||
| 877 | |||
| 878 | /** | ||
| 879 | * ata_eh_speed_down_needed - Determine wheter speed down is necessary | ||
| 880 | * @dev: Device of interest | ||
| 881 | * | ||
| 882 | * This function examines error ring of @dev and determines | ||
| 883 | * whether speed down is necessary. Speed down is necessary if | ||
| 884 | * there have been more than 3 of Cat-1 errors or 10 of Cat-2 | ||
| 885 | * errors during last 15 minutes. | ||
| 886 | * | ||
| 887 | * Cat-1 errors are ATA_BUS, TIMEOUT for any command and HSM | ||
| 888 | * violation for known supported commands. | ||
| 889 | * | ||
| 890 | * Cat-2 errors are unclassified DEV error for known supported | ||
| 891 | * command. | ||
| 892 | * | ||
| 893 | * LOCKING: | ||
| 894 | * Inherited from caller. | ||
| 895 | * | ||
| 896 | * RETURNS: | ||
| 897 | * 1 if speed down is necessary, 0 otherwise | ||
| 898 | */ | ||
| 899 | static int ata_eh_speed_down_needed(struct ata_device *dev) | ||
| 900 | { | ||
| 901 | const u64 interval = 15LLU * 60 * HZ; | ||
| 902 | static const int err_limits[3] = { -1, 3, 10 }; | ||
| 903 | struct speed_down_needed_arg arg; | ||
| 904 | struct ata_ering_entry *ent; | ||
| 905 | int err_cat; | ||
| 906 | u64 j64; | ||
| 907 | |||
| 908 | ent = ata_ering_top(&dev->ering); | ||
| 909 | if (!ent) | ||
| 910 | return 0; | ||
| 911 | |||
| 912 | err_cat = ata_eh_categorize_ering_entry(ent); | ||
| 913 | if (err_cat == 0) | ||
| 914 | return 0; | ||
| 915 | |||
| 916 | memset(&arg, 0, sizeof(arg)); | ||
| 917 | |||
| 918 | j64 = get_jiffies_64(); | ||
| 919 | if (j64 >= interval) | ||
| 920 | arg.since = j64 - interval; | ||
| 921 | else | ||
| 922 | arg.since = 0; | ||
| 923 | |||
| 924 | ata_ering_map(&dev->ering, speed_down_needed_cb, &arg); | ||
| 925 | |||
| 926 | return arg.nr_errors[err_cat] > err_limits[err_cat]; | ||
| 927 | } | ||
| 928 | |||
| 929 | /** | ||
| 930 | * ata_eh_speed_down - record error and speed down if necessary | ||
| 931 | * @dev: Failed device | ||
| 932 | * @is_io: Did the device fail during normal IO? | ||
| 933 | * @err_mask: err_mask of the error | ||
| 934 | * | ||
| 935 | * Record error and examine error history to determine whether | ||
| 936 | * adjusting transmission speed is necessary. It also sets | ||
| 937 | * transmission limits appropriately if such adjustment is | ||
| 938 | * necessary. | ||
| 939 | * | ||
| 940 | * LOCKING: | ||
| 941 | * Kernel thread context (may sleep). | ||
| 942 | * | ||
| 943 | * RETURNS: | ||
| 944 | * 0 on success, -errno otherwise | ||
| 945 | */ | ||
| 946 | static int ata_eh_speed_down(struct ata_device *dev, int is_io, | ||
| 947 | unsigned int err_mask) | ||
| 948 | { | ||
| 949 | if (!err_mask) | ||
| 950 | return 0; | ||
| 951 | |||
| 952 | /* record error and determine whether speed down is necessary */ | ||
| 953 | ata_ering_record(&dev->ering, is_io, err_mask); | ||
| 954 | |||
| 955 | if (!ata_eh_speed_down_needed(dev)) | ||
| 956 | return 0; | ||
| 957 | |||
| 958 | /* speed down SATA link speed if possible */ | ||
| 959 | if (sata_down_spd_limit(dev->ap) == 0) | ||
| 960 | return ATA_EH_HARDRESET; | ||
| 961 | |||
| 962 | /* lower transfer mode */ | ||
| 963 | if (ata_down_xfermask_limit(dev, 0) == 0) | ||
| 964 | return ATA_EH_SOFTRESET; | ||
| 965 | |||
| 966 | ata_dev_printk(dev, KERN_ERR, | ||
| 967 | "speed down requested but no transfer mode left\n"); | ||
| 968 | return 0; | ||
| 969 | } | ||
| 970 | |||
| 971 | /** | ||
| 972 | * ata_eh_autopsy - analyze error and determine recovery action | ||
| 973 | * @ap: ATA port to perform autopsy on | ||
| 974 | * | ||
| 975 | * Analyze why @ap failed and determine which recovery action is | ||
| 976 | * needed. This function also sets more detailed AC_ERR_* values | ||
| 977 | * and fills sense data for ATAPI CHECK SENSE. | ||
| 978 | * | ||
| 979 | * LOCKING: | ||
| 980 | * Kernel thread context (may sleep). | ||
| 981 | */ | ||
| 982 | static void ata_eh_autopsy(struct ata_port *ap) | ||
| 983 | { | ||
| 984 | struct ata_eh_context *ehc = &ap->eh_context; | ||
| 985 | unsigned int action = ehc->i.action; | ||
| 986 | struct ata_device *failed_dev = NULL; | ||
| 987 | unsigned int all_err_mask = 0; | ||
| 988 | int tag, is_io = 0; | ||
| 989 | u32 serror; | ||
| 990 | int rc; | ||
| 991 | |||
| 992 | DPRINTK("ENTER\n"); | ||
| 993 | |||
| 994 | /* obtain and analyze SError */ | ||
| 995 | rc = sata_scr_read(ap, SCR_ERROR, &serror); | ||
| 996 | if (rc == 0) { | ||
| 997 | ehc->i.serror |= serror; | ||
| 998 | ata_eh_analyze_serror(ap); | ||
| 999 | } else if (rc != -EOPNOTSUPP) | ||
| 1000 | action |= ATA_EH_HARDRESET; | ||
| 1001 | |||
| 1002 | /* any real error trumps AC_ERR_OTHER */ | ||
| 1003 | if (ehc->i.err_mask & ~AC_ERR_OTHER) | ||
| 1004 | ehc->i.err_mask &= ~AC_ERR_OTHER; | ||
| 1005 | |||
| 1006 | all_err_mask |= ehc->i.err_mask; | ||
| 1007 | |||
| 1008 | for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { | ||
| 1009 | struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); | ||
| 1010 | |||
| 1011 | if (!(qc->flags & ATA_QCFLAG_FAILED)) | ||
| 1012 | continue; | ||
| 1013 | |||
| 1014 | /* inherit upper level err_mask */ | ||
| 1015 | qc->err_mask |= ehc->i.err_mask; | ||
| 1016 | |||
| 1017 | if (qc->err_mask & AC_ERR_TIMEOUT) | ||
| 1018 | action |= ATA_EH_SOFTRESET; | ||
| 1019 | |||
| 1020 | /* analyze TF */ | ||
| 1021 | action |= ata_eh_analyze_tf(qc, &qc->result_tf); | ||
| 1022 | |||
| 1023 | /* DEV errors are probably spurious in case of ATA_BUS error */ | ||
| 1024 | if (qc->err_mask & AC_ERR_ATA_BUS) | ||
| 1025 | qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | | ||
| 1026 | AC_ERR_INVALID); | ||
| 1027 | |||
| 1028 | /* any real error trumps unknown error */ | ||
| 1029 | if (qc->err_mask & ~AC_ERR_OTHER) | ||
| 1030 | qc->err_mask &= ~AC_ERR_OTHER; | ||
| 1031 | |||
| 1032 | /* SENSE_VALID trumps dev/unknown error and revalidation */ | ||
| 1033 | if (qc->flags & ATA_QCFLAG_SENSE_VALID) { | ||
| 1034 | qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); | ||
| 1035 | action &= ~ATA_EH_REVALIDATE; | ||
| 1036 | } | ||
| 1037 | |||
| 1038 | /* accumulate error info */ | ||
| 1039 | failed_dev = qc->dev; | ||
| 1040 | all_err_mask |= qc->err_mask; | ||
| 1041 | if (qc->flags & ATA_QCFLAG_IO) | ||
| 1042 | is_io = 1; | ||
| 1043 | } | ||
| 1044 | |||
| 1045 | /* speed down iff command was in progress */ | ||
| 1046 | if (failed_dev) | ||
| 1047 | action |= ata_eh_speed_down(failed_dev, is_io, all_err_mask); | ||
| 1048 | |||
| 1049 | if (all_err_mask) | ||
| 1050 | action |= ATA_EH_REVALIDATE; | ||
| 1051 | |||
| 1052 | ehc->i.dev = failed_dev; | ||
| 1053 | ehc->i.action = action; | ||
| 1054 | |||
| 1055 | DPRINTK("EXIT\n"); | ||
| 1056 | } | ||
| 1057 | |||
| 1058 | /** | ||
| 1059 | * ata_eh_report - report error handling to user | ||
| 1060 | * @ap: ATA port EH is going on | ||
| 1061 | * | ||
| 1062 | * Report EH to user. | ||
| 1063 | * | ||
| 1064 | * LOCKING: | ||
| 1065 | * None. | ||
| 1066 | */ | ||
| 1067 | static void ata_eh_report(struct ata_port *ap) | ||
| 1068 | { | ||
| 1069 | struct ata_eh_context *ehc = &ap->eh_context; | ||
| 1070 | const char *frozen, *desc; | ||
| 1071 | int tag, nr_failed = 0; | ||
| 1072 | |||
| 1073 | desc = NULL; | ||
| 1074 | if (ehc->i.desc[0] != '\0') | ||
| 1075 | desc = ehc->i.desc; | ||
| 1076 | |||
| 1077 | for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { | ||
| 1078 | struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); | ||
| 1079 | |||
| 1080 | if (!(qc->flags & ATA_QCFLAG_FAILED)) | ||
| 1081 | continue; | ||
| 1082 | if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) | ||
| 1083 | continue; | ||
| 1084 | |||
| 1085 | nr_failed++; | ||
| 1086 | } | ||
| 1087 | |||
| 1088 | if (!nr_failed && !ehc->i.err_mask) | ||
| 1089 | return; | ||
| 1090 | |||
| 1091 | frozen = ""; | ||
| 1092 | if (ap->flags & ATA_FLAG_FROZEN) | ||
| 1093 | frozen = " frozen"; | ||
| 1094 | |||
| 1095 | if (ehc->i.dev) { | ||
| 1096 | ata_dev_printk(ehc->i.dev, KERN_ERR, | ||
| 1097 | "exception Emask 0x%x SErr 0x%x action 0x%x%s\n", | ||
| 1098 | ehc->i.err_mask, ehc->i.serror, ehc->i.action, | ||
| 1099 | frozen); | ||
| 1100 | if (desc) | ||
| 1101 | ata_dev_printk(ehc->i.dev, KERN_ERR, "(%s)\n", desc); | ||
| 1102 | } else { | ||
| 1103 | ata_port_printk(ap, KERN_ERR, | ||
| 1104 | "exception Emask 0x%x SErr 0x%x action 0x%x%s\n", | ||
| 1105 | ehc->i.err_mask, ehc->i.serror, ehc->i.action, | ||
| 1106 | frozen); | ||
| 1107 | if (desc) | ||
| 1108 | ata_port_printk(ap, KERN_ERR, "(%s)\n", desc); | ||
| 1109 | } | ||
| 1110 | |||
| 1111 | for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { | ||
| 1112 | struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); | ||
| 1113 | |||
| 1114 | if (!(qc->flags & ATA_QCFLAG_FAILED) || !qc->err_mask) | ||
| 1115 | continue; | ||
| 1116 | |||
| 1117 | ata_dev_printk(qc->dev, KERN_ERR, "tag %d cmd 0x%x " | ||
| 1118 | "Emask 0x%x stat 0x%x err 0x%x (%s)\n", | ||
| 1119 | qc->tag, qc->tf.command, qc->err_mask, | ||
| 1120 | qc->result_tf.command, qc->result_tf.feature, | ||
| 1121 | ata_err_string(qc->err_mask)); | ||
| 1122 | } | ||
| 1123 | } | ||
| 1124 | |||
| 1125 | static int ata_eh_reset(struct ata_port *ap, ata_reset_fn_t softreset, | ||
| 1126 | ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) | ||
| 1127 | { | ||
| 1128 | struct ata_eh_context *ehc = &ap->eh_context; | ||
| 1129 | unsigned int classes[ATA_MAX_DEVICES]; | ||
| 1130 | int tries = ATA_EH_RESET_TRIES; | ||
| 1131 | ata_reset_fn_t reset; | ||
| 1132 | int rc; | ||
| 1133 | |||
| 1134 | if (softreset && (!hardreset || (!sata_set_spd_needed(ap) && | ||
| 1135 | !(ehc->i.action & ATA_EH_HARDRESET)))) | ||
| 1136 | reset = softreset; | ||
| 1137 | else | ||
| 1138 | reset = hardreset; | ||
| 1139 | |||
| 1140 | retry: | ||
| 1141 | ata_port_printk(ap, KERN_INFO, "%s resetting port\n", | ||
| 1142 | reset == softreset ? "soft" : "hard"); | ||
| 1143 | |||
| 1144 | /* reset */ | ||
| 1145 | ata_eh_about_to_do(ap, ATA_EH_RESET_MASK); | ||
| 1146 | ehc->i.flags |= ATA_EHI_DID_RESET; | ||
| 1147 | |||
| 1148 | rc = ata_do_reset(ap, reset, classes); | ||
| 1149 | |||
| 1150 | if (rc && --tries) { | ||
| 1151 | ata_port_printk(ap, KERN_WARNING, | ||
| 1152 | "%sreset failed, retrying in 5 secs\n", | ||
| 1153 | reset == softreset ? "soft" : "hard"); | ||
| 1154 | ssleep(5); | ||
| 1155 | |||
| 1156 | if (reset == hardreset) | ||
| 1157 | sata_down_spd_limit(ap); | ||
| 1158 | if (hardreset) | ||
| 1159 | reset = hardreset; | ||
| 1160 | goto retry; | ||
| 1161 | } | ||
| 1162 | |||
| 1163 | if (rc == 0) { | ||
| 1164 | if (postreset) | ||
| 1165 | postreset(ap, classes); | ||
| 1166 | |||
| 1167 | /* reset successful, schedule revalidation */ | ||
| 1168 | ehc->i.dev = NULL; | ||
| 1169 | ehc->i.action &= ~ATA_EH_RESET_MASK; | ||
| 1170 | ehc->i.action |= ATA_EH_REVALIDATE; | ||
| 1171 | } | ||
| 1172 | |||
| 1173 | return rc; | ||
| 1174 | } | ||
| 1175 | |||
| 1176 | static int ata_eh_revalidate(struct ata_port *ap, | ||
| 1177 | struct ata_device **r_failed_dev) | ||
| 1178 | { | ||
| 1179 | struct ata_eh_context *ehc = &ap->eh_context; | ||
| 1180 | struct ata_device *dev; | ||
| 1181 | int i, rc = 0; | ||
| 1182 | |||
| 1183 | DPRINTK("ENTER\n"); | ||
| 1184 | |||
| 1185 | for (i = 0; i < ATA_MAX_DEVICES; i++) { | ||
| 1186 | dev = &ap->device[i]; | ||
| 1187 | |||
| 1188 | if (ehc->i.action & ATA_EH_REVALIDATE && ata_dev_enabled(dev) && | ||
| 1189 | (!ehc->i.dev || ehc->i.dev == dev)) { | ||
| 1190 | if (ata_port_offline(ap)) { | ||
| 1191 | rc = -EIO; | ||
| 1192 | break; | ||
| 1193 | } | ||
| 1194 | |||
| 1195 | ata_eh_about_to_do(ap, ATA_EH_REVALIDATE); | ||
| 1196 | rc = ata_dev_revalidate(dev, | ||
| 1197 | ehc->i.flags & ATA_EHI_DID_RESET); | ||
| 1198 | if (rc) | ||
| 1199 | break; | ||
| 1200 | |||
| 1201 | ehc->i.action &= ~ATA_EH_REVALIDATE; | ||
| 1202 | } | ||
| 1203 | } | ||
| 1204 | |||
| 1205 | if (rc) | ||
| 1206 | *r_failed_dev = dev; | ||
| 1207 | |||
| 1208 | DPRINTK("EXIT\n"); | ||
| 1209 | return rc; | ||
| 1210 | } | ||
| 1211 | |||
| 1212 | static int ata_port_nr_enabled(struct ata_port *ap) | ||
| 1213 | { | ||
| 1214 | int i, cnt = 0; | ||
| 1215 | |||
| 1216 | for (i = 0; i < ATA_MAX_DEVICES; i++) | ||
| 1217 | if (ata_dev_enabled(&ap->device[i])) | ||
| 1218 | cnt++; | ||
| 1219 | return cnt; | ||
| 1220 | } | ||
| 1221 | |||
| 1222 | /** | ||
| 1223 | * ata_eh_recover - recover host port after error | ||
| 1224 | * @ap: host port to recover | ||
| 1225 | * @softreset: softreset method (can be NULL) | ||
| 1226 | * @hardreset: hardreset method (can be NULL) | ||
| 1227 | * @postreset: postreset method (can be NULL) | ||
| 1228 | * | ||
| 1229 | * This is the alpha and omega, eum and yang, heart and soul of | ||
| 1230 | * libata exception handling. On entry, actions required to | ||
| 1231 | * recover each devices are recorded in eh_context. This | ||
| 1232 | * function executes all the operations with appropriate retrials | ||
| 1233 | * and fallbacks to resurrect failed devices. | ||
| 1234 | * | ||
| 1235 | * LOCKING: | ||
| 1236 | * Kernel thread context (may sleep). | ||
| 1237 | * | ||
| 1238 | * RETURNS: | ||
| 1239 | * 0 on success, -errno on failure. | ||
| 1240 | */ | ||
| 1241 | static int ata_eh_recover(struct ata_port *ap, ata_reset_fn_t softreset, | ||
| 1242 | ata_reset_fn_t hardreset, | ||
| 1243 | ata_postreset_fn_t postreset) | ||
| 1244 | { | ||
| 1245 | struct ata_eh_context *ehc = &ap->eh_context; | ||
| 1246 | struct ata_device *dev; | ||
| 1247 | int down_xfermask, i, rc; | ||
| 1248 | |||
| 1249 | DPRINTK("ENTER\n"); | ||
| 1250 | |||
| 1251 | /* prep for recovery */ | ||
| 1252 | for (i = 0; i < ATA_MAX_DEVICES; i++) { | ||
| 1253 | dev = &ap->device[i]; | ||
| 1254 | |||
| 1255 | ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; | ||
| 1256 | } | ||
| 1257 | |||
| 1258 | retry: | ||
| 1259 | down_xfermask = 0; | ||
| 1260 | rc = 0; | ||
| 1261 | |||
| 1262 | /* skip EH if possible. */ | ||
| 1263 | if (!ata_port_nr_enabled(ap) && !(ap->flags & ATA_FLAG_FROZEN)) | ||
| 1264 | ehc->i.action = 0; | ||
| 1265 | |||
| 1266 | /* reset */ | ||
| 1267 | if (ehc->i.action & ATA_EH_RESET_MASK) { | ||
| 1268 | ata_eh_freeze_port(ap); | ||
| 1269 | |||
| 1270 | rc = ata_eh_reset(ap, softreset, hardreset, postreset); | ||
| 1271 | if (rc) { | ||
| 1272 | ata_port_printk(ap, KERN_ERR, | ||
| 1273 | "reset failed, giving up\n"); | ||
| 1274 | goto out; | ||
| 1275 | } | ||
| 1276 | |||
| 1277 | ata_eh_thaw_port(ap); | ||
| 1278 | } | ||
| 1279 | |||
| 1280 | /* revalidate existing devices */ | ||
| 1281 | rc = ata_eh_revalidate(ap, &dev); | ||
| 1282 | if (rc) | ||
| 1283 | goto dev_fail; | ||
| 1284 | |||
| 1285 | /* configure transfer mode if the port has been reset */ | ||
| 1286 | if (ehc->i.flags & ATA_EHI_DID_RESET) { | ||
| 1287 | rc = ata_set_mode(ap, &dev); | ||
| 1288 | if (rc) { | ||
| 1289 | down_xfermask = 1; | ||
| 1290 | goto dev_fail; | ||
| 1291 | } | ||
| 1292 | } | ||
| 1293 | |||
| 1294 | goto out; | ||
| 1295 | |||
| 1296 | dev_fail: | ||
| 1297 | switch (rc) { | ||
| 1298 | case -ENODEV: | ||
| 1299 | case -EINVAL: | ||
| 1300 | ehc->tries[dev->devno] = 0; | ||
| 1301 | break; | ||
| 1302 | case -EIO: | ||
| 1303 | sata_down_spd_limit(ap); | ||
| 1304 | default: | ||
| 1305 | ehc->tries[dev->devno]--; | ||
| 1306 | if (down_xfermask && | ||
| 1307 | ata_down_xfermask_limit(dev, ehc->tries[dev->devno] == 1)) | ||
| 1308 | ehc->tries[dev->devno] = 0; | ||
| 1309 | } | ||
| 1310 | |||
| 1311 | /* disable device if it has used up all its chances */ | ||
| 1312 | if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) | ||
| 1313 | ata_dev_disable(dev); | ||
| 1314 | |||
| 1315 | /* soft didn't work? be haaaaard */ | ||
| 1316 | if (ehc->i.flags & ATA_EHI_DID_RESET) | ||
| 1317 | ehc->i.action |= ATA_EH_HARDRESET; | ||
| 1318 | else | ||
| 1319 | ehc->i.action |= ATA_EH_SOFTRESET; | ||
| 1320 | |||
| 1321 | if (ata_port_nr_enabled(ap)) { | ||
| 1322 | ata_port_printk(ap, KERN_WARNING, "failed to recover some " | ||
| 1323 | "devices, retrying in 5 secs\n"); | ||
| 1324 | ssleep(5); | ||
| 1325 | } else { | ||
| 1326 | /* no device left, repeat fast */ | ||
| 1327 | msleep(500); | ||
| 1328 | } | ||
| 1329 | |||
| 1330 | goto retry; | ||
| 1331 | |||
| 1332 | out: | ||
| 1333 | if (rc) { | ||
| 1334 | for (i = 0; i < ATA_MAX_DEVICES; i++) | ||
| 1335 | ata_dev_disable(&ap->device[i]); | ||
| 1336 | } | ||
| 1337 | |||
| 1338 | DPRINTK("EXIT, rc=%d\n", rc); | ||
| 1339 | return rc; | ||
| 1340 | } | ||
| 1341 | |||
| 1342 | /** | ||
| 1343 | * ata_eh_finish - finish up EH | ||
| 1344 | * @ap: host port to finish EH for | ||
| 1345 | * | ||
| 1346 | * Recovery is complete. Clean up EH states and retry or finish | ||
| 1347 | * failed qcs. | ||
| 1348 | * | ||
| 1349 | * LOCKING: | ||
| 1350 | * None. | ||
| 1351 | */ | ||
| 1352 | static void ata_eh_finish(struct ata_port *ap) | ||
| 1353 | { | ||
| 1354 | int tag; | ||
| 1355 | |||
| 1356 | /* retry or finish qcs */ | ||
| 1357 | for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { | ||
| 1358 | struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); | ||
| 1359 | |||
| 1360 | if (!(qc->flags & ATA_QCFLAG_FAILED)) | ||
| 1361 | continue; | ||
| 1362 | |||
| 1363 | if (qc->err_mask) { | ||
| 1364 | /* FIXME: Once EH migration is complete, | ||
| 1365 | * generate sense data in this function, | ||
| 1366 | * considering both err_mask and tf. | ||
| 1367 | */ | ||
| 1368 | if (qc->err_mask & AC_ERR_INVALID) | ||
| 1369 | ata_eh_qc_complete(qc); | ||
| 1370 | else | ||
| 1371 | ata_eh_qc_retry(qc); | ||
| 1372 | } else { | ||
| 1373 | if (qc->flags & ATA_QCFLAG_SENSE_VALID) { | ||
| 1374 | ata_eh_qc_complete(qc); | ||
| 1375 | } else { | ||
| 1376 | /* feed zero TF to sense generation */ | ||
| 1377 | memset(&qc->result_tf, 0, sizeof(qc->result_tf)); | ||
| 1378 | ata_eh_qc_retry(qc); | ||
| 1379 | } | ||
| 1380 | } | ||
| 1381 | } | ||
| 1382 | } | ||
| 1383 | |||
| 1384 | /** | ||
| 1385 | * ata_do_eh - do standard error handling | ||
| 1386 | * @ap: host port to handle error for | ||
| 1387 | * @softreset: softreset method (can be NULL) | ||
| 1388 | * @hardreset: hardreset method (can be NULL) | ||
| 1389 | * @postreset: postreset method (can be NULL) | ||
| 1390 | * | ||
| 1391 | * Perform standard error handling sequence. | ||
| 1392 | * | ||
| 1393 | * LOCKING: | ||
| 1394 | * Kernel thread context (may sleep). | ||
| 1395 | */ | ||
| 1396 | void ata_do_eh(struct ata_port *ap, ata_reset_fn_t softreset, | ||
| 1397 | ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) | ||
| 1398 | { | ||
| 1399 | ata_eh_autopsy(ap); | ||
| 1400 | ata_eh_report(ap); | ||
| 1401 | ata_eh_recover(ap, softreset, hardreset, postreset); | ||
| 1402 | ata_eh_finish(ap); | ||
| 1403 | } | ||
diff --git a/include/linux/libata.h b/include/linux/libata.h index 298f9918e375..9fe46073cf8c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h | |||
| @@ -247,6 +247,8 @@ enum { | |||
| 247 | 247 | ||
| 248 | /* how hard are we gonna try to probe/recover devices */ | 248 | /* how hard are we gonna try to probe/recover devices */ |
| 249 | ATA_PROBE_MAX_TRIES = 3, | 249 | ATA_PROBE_MAX_TRIES = 3, |
| 250 | ATA_EH_RESET_TRIES = 3, | ||
| 251 | ATA_EH_DEV_TRIES = 3, | ||
| 250 | }; | 252 | }; |
| 251 | 253 | ||
| 252 | enum hsm_task_states { | 254 | enum hsm_task_states { |
| @@ -727,6 +729,9 @@ extern void ata_eh_thaw_port(struct ata_port *ap); | |||
| 727 | extern void ata_eh_qc_complete(struct ata_queued_cmd *qc); | 729 | extern void ata_eh_qc_complete(struct ata_queued_cmd *qc); |
| 728 | extern void ata_eh_qc_retry(struct ata_queued_cmd *qc); | 730 | extern void ata_eh_qc_retry(struct ata_queued_cmd *qc); |
| 729 | 731 | ||
| 732 | extern void ata_do_eh(struct ata_port *ap, ata_reset_fn_t softreset, | ||
| 733 | ata_reset_fn_t hardreset, ata_postreset_fn_t postreset); | ||
| 734 | |||
| 730 | /* | 735 | /* |
| 731 | * printk helpers | 736 | * printk helpers |
| 732 | */ | 737 | */ |
