aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYaniv Gardi <ygardi@codeaurora.org>2016-03-10 10:37:13 -0500
committerMartin K. Petersen <martin.petersen@oracle.com>2016-03-14 21:04:45 -0400
commit583fa62d082483412715af9ab4f528fcf00e4c38 (patch)
tree88d11be7de4bbb06e6f2635fb3063b87ed0b394b
parent9a47ec7c390e819d2ca61f7a55d16412f168b674 (diff)
scsi: ufs: add error recovery after DL NAC error
Some vendor's UFS device sends back to back NACs for the DL data frames causing the host controller to raise the DFES error status. Sometimes such UFS devices send back to back NAC without waiting for new retransmitted DL frame from the host and in such cases it might be possible the Host UniPro goes into bad state without raising the DFES error interrupt. If this happens then all the pending commands would timeout only after respective SW command (which is generally too large). This change workarounds such device behaviour like this: - As soon as SW sees the DL NAC error, it would schedule the error handler - Error handler would sleep for 50ms to see if there any fatal errors raised by UFS controller. - If there are fatal errors then SW does normal error recovery. - If there are no fatal errors then SW sends the NOP command to device to check if link is alive. - If NOP command times out, SW does normal error recovery - If NOP command succeed, skip the error handling. If DL NAC error is seen multiple times with some vendor's UFS devices then enable this quirk to initiate quick error recovery and also silence related error logs to reduce spamming of kernel logs. Reviewed-by: Hannes Reinecke <hare@suse.de> Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org> Signed-off-by: Yaniv Gardi <ygardi@codeaurora.org> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
-rw-r--r--drivers/scsi/ufs/ufshcd.c93
-rw-r--r--drivers/scsi/ufs/ufshci.h2
2 files changed, 95 insertions, 0 deletions
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index adaae345b7c1..4eedb7fafa95 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -3795,6 +3795,79 @@ static void ufshcd_complete_requests(struct ufs_hba *hba)
3795} 3795}
3796 3796
3797/** 3797/**
3798 * ufshcd_quirk_dl_nac_errors - This function checks if error handling is
3799 * to recover from the DL NAC errors or not.
3800 * @hba: per-adapter instance
3801 *
3802 * Returns true if error handling is required, false otherwise
3803 */
3804static bool ufshcd_quirk_dl_nac_errors(struct ufs_hba *hba)
3805{
3806 unsigned long flags;
3807 bool err_handling = true;
3808
3809 spin_lock_irqsave(hba->host->host_lock, flags);
3810 /*
3811 * UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS only workaround the
3812 * device fatal error and/or DL NAC & REPLAY timeout errors.
3813 */
3814 if (hba->saved_err & (CONTROLLER_FATAL_ERROR | SYSTEM_BUS_FATAL_ERROR))
3815 goto out;
3816
3817 if ((hba->saved_err & DEVICE_FATAL_ERROR) ||
3818 ((hba->saved_err & UIC_ERROR) &&
3819 (hba->saved_uic_err & UFSHCD_UIC_DL_TCx_REPLAY_ERROR)))
3820 goto out;
3821
3822 if ((hba->saved_err & UIC_ERROR) &&
3823 (hba->saved_uic_err & UFSHCD_UIC_DL_NAC_RECEIVED_ERROR)) {
3824 int err;
3825 /*
3826 * wait for 50ms to see if we can get any other errors or not.
3827 */
3828 spin_unlock_irqrestore(hba->host->host_lock, flags);
3829 msleep(50);
3830 spin_lock_irqsave(hba->host->host_lock, flags);
3831
3832 /*
3833 * now check if we have got any other severe errors other than
3834 * DL NAC error?
3835 */
3836 if ((hba->saved_err & INT_FATAL_ERRORS) ||
3837 ((hba->saved_err & UIC_ERROR) &&
3838 (hba->saved_uic_err & ~UFSHCD_UIC_DL_NAC_RECEIVED_ERROR)))
3839 goto out;
3840
3841 /*
3842 * As DL NAC is the only error received so far, send out NOP
3843 * command to confirm if link is still active or not.
3844 * - If we don't get any response then do error recovery.
3845 * - If we get response then clear the DL NAC error bit.
3846 */
3847
3848 spin_unlock_irqrestore(hba->host->host_lock, flags);
3849 err = ufshcd_verify_dev_init(hba);
3850 spin_lock_irqsave(hba->host->host_lock, flags);
3851
3852 if (err)
3853 goto out;
3854
3855 /* Link seems to be alive hence ignore the DL NAC errors */
3856 if (hba->saved_uic_err == UFSHCD_UIC_DL_NAC_RECEIVED_ERROR)
3857 hba->saved_err &= ~UIC_ERROR;
3858 /* clear NAC error */
3859 hba->saved_uic_err &= ~UFSHCD_UIC_DL_NAC_RECEIVED_ERROR;
3860 if (!hba->saved_uic_err) {
3861 err_handling = false;
3862 goto out;
3863 }
3864 }
3865out:
3866 spin_unlock_irqrestore(hba->host->host_lock, flags);
3867 return err_handling;
3868}
3869
3870/**
3798 * ufshcd_err_handler - handle UFS errors that require s/w attention 3871 * ufshcd_err_handler - handle UFS errors that require s/w attention
3799 * @work: pointer to work structure 3872 * @work: pointer to work structure
3800 */ 3873 */
@@ -3822,6 +3895,17 @@ static void ufshcd_err_handler(struct work_struct *work)
3822 3895
3823 /* Complete requests that have door-bell cleared by h/w */ 3896 /* Complete requests that have door-bell cleared by h/w */
3824 ufshcd_complete_requests(hba); 3897 ufshcd_complete_requests(hba);
3898
3899 if (hba->dev_quirks & UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) {
3900 bool ret;
3901
3902 spin_unlock_irqrestore(hba->host->host_lock, flags);
3903 /* release the lock as ufshcd_quirk_dl_nac_errors() may sleep */
3904 ret = ufshcd_quirk_dl_nac_errors(hba);
3905 spin_lock_irqsave(hba->host->host_lock, flags);
3906 if (!ret)
3907 goto skip_err_handling;
3908 }
3825 if ((hba->saved_err & INT_FATAL_ERRORS) || 3909 if ((hba->saved_err & INT_FATAL_ERRORS) ||
3826 ((hba->saved_err & UIC_ERROR) && 3910 ((hba->saved_err & UIC_ERROR) &&
3827 (hba->saved_uic_err & (UFSHCD_UIC_DL_PA_INIT_ERROR | 3911 (hba->saved_uic_err & (UFSHCD_UIC_DL_PA_INIT_ERROR |
@@ -3897,6 +3981,7 @@ skip_pending_xfer_clear:
3897 hba->saved_uic_err = 0; 3981 hba->saved_uic_err = 0;
3898 } 3982 }
3899 3983
3984skip_err_handling:
3900 if (!needs_reset) { 3985 if (!needs_reset) {
3901 hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL; 3986 hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
3902 if (hba->saved_err || hba->saved_uic_err) 3987 if (hba->saved_err || hba->saved_uic_err)
@@ -3925,6 +4010,14 @@ static void ufshcd_update_uic_error(struct ufs_hba *hba)
3925 reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER); 4010 reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER);
3926 if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT) 4011 if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT)
3927 hba->uic_error |= UFSHCD_UIC_DL_PA_INIT_ERROR; 4012 hba->uic_error |= UFSHCD_UIC_DL_PA_INIT_ERROR;
4013 else if (hba->dev_quirks &
4014 UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) {
4015 if (reg & UIC_DATA_LINK_LAYER_ERROR_NAC_RECEIVED)
4016 hba->uic_error |=
4017 UFSHCD_UIC_DL_NAC_RECEIVED_ERROR;
4018 else if (reg & UIC_DATA_LINK_LAYER_ERROR_TCx_REPLAY_TIMEOUT)
4019 hba->uic_error |= UFSHCD_UIC_DL_TCx_REPLAY_ERROR;
4020 }
3928 4021
3929 /* UIC NL/TL/DME errors needs software retry */ 4022 /* UIC NL/TL/DME errors needs software retry */
3930 reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_NETWORK_LAYER); 4023 reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_NETWORK_LAYER);
diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h
index 0ae0967aaed8..2b05bfb6b3d7 100644
--- a/drivers/scsi/ufs/ufshci.h
+++ b/drivers/scsi/ufs/ufshci.h
@@ -170,6 +170,8 @@ enum {
170#define UIC_DATA_LINK_LAYER_ERROR UFS_BIT(31) 170#define UIC_DATA_LINK_LAYER_ERROR UFS_BIT(31)
171#define UIC_DATA_LINK_LAYER_ERROR_CODE_MASK 0x7FFF 171#define UIC_DATA_LINK_LAYER_ERROR_CODE_MASK 0x7FFF
172#define UIC_DATA_LINK_LAYER_ERROR_PA_INIT 0x2000 172#define UIC_DATA_LINK_LAYER_ERROR_PA_INIT 0x2000
173#define UIC_DATA_LINK_LAYER_ERROR_NAC_RECEIVED 0x0001
174#define UIC_DATA_LINK_LAYER_ERROR_TCx_REPLAY_TIMEOUT 0x0002
173 175
174/* UECN - Host UIC Error Code Network Layer 40h */ 176/* UECN - Host UIC Error Code Network Layer 40h */
175#define UIC_NETWORK_LAYER_ERROR UFS_BIT(31) 177#define UIC_NETWORK_LAYER_ERROR UFS_BIT(31)