aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSujit Reddy Thumma <sthumma@codeaurora.org>2014-05-26 01:29:15 -0400
committerChristoph Hellwig <hch@lst.de>2014-05-28 06:25:13 -0400
commite8e7f27139d5ba5b9f05fdee3f6224116ce8427c (patch)
treea8e3489ba489593086256f2d996289d9d151c7f5
parent3441da7ddbdedf91bfd5cc8609c571ffc24942dd (diff)
scsi: ufs: Improve UFS fatal error handling
Error handling in UFS driver is broken and resets the host controller for fatal errors without re-initialization. Correct the fatal error handling sequence according to UFS Host Controller Interface (HCI) v1.1 specification. o Processed requests which are completed w/wo error are reported to SCSI layer and any pending commands that are not started are aborted in the controller and re-queued into scsi mid-layer queue. o Upon determining fatal error condition the host controller may hang forever until a reset is applied. Block SCSI layer for sending new requests and apply reset in a separate error handling work. o SCSI is informed about the expected Unit-Attention exception from the device for the immediate command after a reset so that the SCSI layer take necessary steps to establish communication with the device. Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org> Reviewed-by: Yaniv Gardi <ygardi@codeaurora.org> Tested-by: Dolev Raviv <draviv@codeaurora.org> Acked-by: Vinayak Holikatti <vinholikatti@gmail.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
-rw-r--r--drivers/scsi/ufs/ufshcd.c229
-rw-r--r--drivers/scsi/ufs/ufshcd.h10
2 files changed, 149 insertions, 90 deletions
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 54623104f262..0c2877251251 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -84,6 +84,14 @@ enum {
84 UFSHCD_EH_IN_PROGRESS = (1 << 0), 84 UFSHCD_EH_IN_PROGRESS = (1 << 0),
85}; 85};
86 86
87/* UFSHCD UIC layer error flags */
88enum {
89 UFSHCD_UIC_DL_PA_INIT_ERROR = (1 << 0), /* Data link layer error */
90 UFSHCD_UIC_NL_ERROR = (1 << 1), /* Network layer error */
91 UFSHCD_UIC_TL_ERROR = (1 << 2), /* Transport Layer error */
92 UFSHCD_UIC_DME_ERROR = (1 << 3), /* DME error */
93};
94
87/* Interrupt configuration options */ 95/* Interrupt configuration options */
88enum { 96enum {
89 UFSHCD_INT_DISABLE, 97 UFSHCD_INT_DISABLE,
@@ -100,6 +108,8 @@ enum {
100 108
101static void ufshcd_tmc_handler(struct ufs_hba *hba); 109static void ufshcd_tmc_handler(struct ufs_hba *hba);
102static void ufshcd_async_scan(void *data, async_cookie_t cookie); 110static void ufshcd_async_scan(void *data, async_cookie_t cookie);
111static int ufshcd_reset_and_restore(struct ufs_hba *hba);
112static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag);
103 113
104/* 114/*
105 * ufshcd_wait_for_register - wait for register value to change 115 * ufshcd_wait_for_register - wait for register value to change
@@ -1735,9 +1745,6 @@ static int ufshcd_make_hba_operational(struct ufs_hba *hba)
1735 goto out; 1745 goto out;
1736 } 1746 }
1737 1747
1738 if (hba->ufshcd_state == UFSHCD_STATE_RESET)
1739 scsi_unblock_requests(hba->host);
1740
1741out: 1748out:
1742 return err; 1749 return err;
1743} 1750}
@@ -1863,66 +1870,6 @@ static int ufshcd_verify_dev_init(struct ufs_hba *hba)
1863} 1870}
1864 1871
1865/** 1872/**
1866 * ufshcd_do_reset - reset the host controller
1867 * @hba: per adapter instance
1868 *
1869 * Returns SUCCESS/FAILED
1870 */
1871static int ufshcd_do_reset(struct ufs_hba *hba)
1872{
1873 struct ufshcd_lrb *lrbp;
1874 unsigned long flags;
1875 int tag;
1876
1877 /* block commands from midlayer */
1878 scsi_block_requests(hba->host);
1879
1880 spin_lock_irqsave(hba->host->host_lock, flags);
1881 hba->ufshcd_state = UFSHCD_STATE_RESET;
1882
1883 /* send controller to reset state */
1884 ufshcd_hba_stop(hba);
1885 spin_unlock_irqrestore(hba->host->host_lock, flags);
1886
1887 /* abort outstanding commands */
1888 for (tag = 0; tag < hba->nutrs; tag++) {
1889 if (test_bit(tag, &hba->outstanding_reqs)) {
1890 lrbp = &hba->lrb[tag];
1891 if (lrbp->cmd) {
1892 scsi_dma_unmap(lrbp->cmd);
1893 lrbp->cmd->result = DID_RESET << 16;
1894 lrbp->cmd->scsi_done(lrbp->cmd);
1895 lrbp->cmd = NULL;
1896 clear_bit_unlock(tag, &hba->lrb_in_use);
1897 }
1898 }
1899 }
1900
1901 /* complete device management command */
1902 if (hba->dev_cmd.complete)
1903 complete(hba->dev_cmd.complete);
1904
1905 /* clear outstanding request/task bit maps */
1906 hba->outstanding_reqs = 0;
1907 hba->outstanding_tasks = 0;
1908
1909 /* Host controller enable */
1910 if (ufshcd_hba_enable(hba)) {
1911 dev_err(hba->dev,
1912 "Reset: Controller initialization failed\n");
1913 return FAILED;
1914 }
1915
1916 if (ufshcd_link_startup(hba)) {
1917 dev_err(hba->dev,
1918 "Reset: Link start-up failed\n");
1919 return FAILED;
1920 }
1921
1922 return SUCCESS;
1923}
1924
1925/**
1926 * ufshcd_slave_alloc - handle initial SCSI device configurations 1873 * ufshcd_slave_alloc - handle initial SCSI device configurations
1927 * @sdev: pointer to SCSI device 1874 * @sdev: pointer to SCSI device
1928 * 1875 *
@@ -1939,6 +1886,9 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev)
1939 sdev->use_10_for_ms = 1; 1886 sdev->use_10_for_ms = 1;
1940 scsi_set_tag_type(sdev, MSG_SIMPLE_TAG); 1887 scsi_set_tag_type(sdev, MSG_SIMPLE_TAG);
1941 1888
1889 /* allow SCSI layer to restart the device in case of errors */
1890 sdev->allow_restart = 1;
1891
1942 /* 1892 /*
1943 * Inform SCSI Midlayer that the LUN queue depth is same as the 1893 * Inform SCSI Midlayer that the LUN queue depth is same as the
1944 * controller queue depth. If a LUN queue depth is less than the 1894 * controller queue depth. If a LUN queue depth is less than the
@@ -2134,6 +2084,9 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
2134 case OCS_ABORTED: 2084 case OCS_ABORTED:
2135 result |= DID_ABORT << 16; 2085 result |= DID_ABORT << 16;
2136 break; 2086 break;
2087 case OCS_INVALID_COMMAND_STATUS:
2088 result |= DID_REQUEUE << 16;
2089 break;
2137 case OCS_INVALID_CMD_TABLE_ATTR: 2090 case OCS_INVALID_CMD_TABLE_ATTR:
2138 case OCS_INVALID_PRDT_ATTR: 2091 case OCS_INVALID_PRDT_ATTR:
2139 case OCS_MISMATCH_DATA_BUF_SIZE: 2092 case OCS_MISMATCH_DATA_BUF_SIZE:
@@ -2451,45 +2404,145 @@ out:
2451} 2404}
2452 2405
2453/** 2406/**
2454 * ufshcd_fatal_err_handler - handle fatal errors 2407 * ufshcd_err_handler - handle UFS errors that require s/w attention
2455 * @hba: per adapter instance 2408 * @work: pointer to work structure
2456 */ 2409 */
2457static void ufshcd_fatal_err_handler(struct work_struct *work) 2410static void ufshcd_err_handler(struct work_struct *work)
2458{ 2411{
2459 struct ufs_hba *hba; 2412 struct ufs_hba *hba;
2460 hba = container_of(work, struct ufs_hba, feh_workq); 2413 unsigned long flags;
2414 u32 err_xfer = 0;
2415 u32 err_tm = 0;
2416 int err = 0;
2417 int tag;
2418
2419 hba = container_of(work, struct ufs_hba, eh_work);
2461 2420
2462 pm_runtime_get_sync(hba->dev); 2421 pm_runtime_get_sync(hba->dev);
2463 /* check if reset is already in progress */ 2422
2464 if (hba->ufshcd_state != UFSHCD_STATE_RESET) 2423 spin_lock_irqsave(hba->host->host_lock, flags);
2465 ufshcd_do_reset(hba); 2424 if (hba->ufshcd_state == UFSHCD_STATE_RESET) {
2425 spin_unlock_irqrestore(hba->host->host_lock, flags);
2426 goto out;
2427 }
2428
2429 hba->ufshcd_state = UFSHCD_STATE_RESET;
2430 ufshcd_set_eh_in_progress(hba);
2431
2432 /* Complete requests that have door-bell cleared by h/w */
2433 ufshcd_transfer_req_compl(hba);
2434 ufshcd_tmc_handler(hba);
2435 spin_unlock_irqrestore(hba->host->host_lock, flags);
2436
2437 /* Clear pending transfer requests */
2438 for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs)
2439 if (ufshcd_clear_cmd(hba, tag))
2440 err_xfer |= 1 << tag;
2441
2442 /* Clear pending task management requests */
2443 for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs)
2444 if (ufshcd_clear_tm_cmd(hba, tag))
2445 err_tm |= 1 << tag;
2446
2447 /* Complete the requests that are cleared by s/w */
2448 spin_lock_irqsave(hba->host->host_lock, flags);
2449 ufshcd_transfer_req_compl(hba);
2450 ufshcd_tmc_handler(hba);
2451 spin_unlock_irqrestore(hba->host->host_lock, flags);
2452
2453 /* Fatal errors need reset */
2454 if (err_xfer || err_tm || (hba->saved_err & INT_FATAL_ERRORS) ||
2455 ((hba->saved_err & UIC_ERROR) &&
2456 (hba->saved_uic_err & UFSHCD_UIC_DL_PA_INIT_ERROR))) {
2457 err = ufshcd_reset_and_restore(hba);
2458 if (err) {
2459 dev_err(hba->dev, "%s: reset and restore failed\n",
2460 __func__);
2461 hba->ufshcd_state = UFSHCD_STATE_ERROR;
2462 }
2463 /*
2464 * Inform scsi mid-layer that we did reset and allow to handle
2465 * Unit Attention properly.
2466 */
2467 scsi_report_bus_reset(hba->host, 0);
2468 hba->saved_err = 0;
2469 hba->saved_uic_err = 0;
2470 }
2471 ufshcd_clear_eh_in_progress(hba);
2472
2473out:
2474 scsi_unblock_requests(hba->host);
2466 pm_runtime_put_sync(hba->dev); 2475 pm_runtime_put_sync(hba->dev);
2467} 2476}
2468 2477
2469/** 2478/**
2470 * ufshcd_err_handler - Check for fatal errors 2479 * ufshcd_update_uic_error - check and set fatal UIC error flags.
2471 * @work: pointer to a work queue structure 2480 * @hba: per-adapter instance
2472 */ 2481 */
2473static void ufshcd_err_handler(struct ufs_hba *hba) 2482static void ufshcd_update_uic_error(struct ufs_hba *hba)
2474{ 2483{
2475 u32 reg; 2484 u32 reg;
2476 2485
2486 /* PA_INIT_ERROR is fatal and needs UIC reset */
2487 reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER);
2488 if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT)
2489 hba->uic_error |= UFSHCD_UIC_DL_PA_INIT_ERROR;
2490
2491 /* UIC NL/TL/DME errors needs software retry */
2492 reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_NETWORK_LAYER);
2493 if (reg)
2494 hba->uic_error |= UFSHCD_UIC_NL_ERROR;
2495
2496 reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_TRANSPORT_LAYER);
2497 if (reg)
2498 hba->uic_error |= UFSHCD_UIC_TL_ERROR;
2499
2500 reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DME);
2501 if (reg)
2502 hba->uic_error |= UFSHCD_UIC_DME_ERROR;
2503
2504 dev_dbg(hba->dev, "%s: UIC error flags = 0x%08x\n",
2505 __func__, hba->uic_error);
2506}
2507
2508/**
2509 * ufshcd_check_errors - Check for errors that need s/w attention
2510 * @hba: per-adapter instance
2511 */
2512static void ufshcd_check_errors(struct ufs_hba *hba)
2513{
2514 bool queue_eh_work = false;
2515
2477 if (hba->errors & INT_FATAL_ERRORS) 2516 if (hba->errors & INT_FATAL_ERRORS)
2478 goto fatal_eh; 2517 queue_eh_work = true;
2479 2518
2480 if (hba->errors & UIC_ERROR) { 2519 if (hba->errors & UIC_ERROR) {
2481 reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER); 2520 hba->uic_error = 0;
2482 if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT) 2521 ufshcd_update_uic_error(hba);
2483 goto fatal_eh; 2522 if (hba->uic_error)
2523 queue_eh_work = true;
2484 } 2524 }
2485 return; 2525
2486fatal_eh: 2526 if (queue_eh_work) {
2487 /* handle fatal errors only when link is functional */ 2527 /* handle fatal errors only when link is functional */
2488 if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) { 2528 if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
2489 /* block commands at driver layer until error is handled */ 2529 /* block commands from scsi mid-layer */
2490 hba->ufshcd_state = UFSHCD_STATE_ERROR; 2530 scsi_block_requests(hba->host);
2491 schedule_work(&hba->feh_workq); 2531
2532 /* transfer error masks to sticky bits */
2533 hba->saved_err |= hba->errors;
2534 hba->saved_uic_err |= hba->uic_error;
2535
2536 hba->ufshcd_state = UFSHCD_STATE_ERROR;
2537 schedule_work(&hba->eh_work);
2538 }
2492 } 2539 }
2540 /*
2541 * if (!queue_eh_work) -
2542 * Other errors are either non-fatal where host recovers
2543 * itself without s/w intervention or errors that will be
2544 * handled by the SCSI core layer.
2545 */
2493} 2546}
2494 2547
2495/** 2548/**
@@ -2514,7 +2567,7 @@ static void ufshcd_sl_intr(struct ufs_hba *hba, u32 intr_status)
2514{ 2567{
2515 hba->errors = UFSHCD_ERROR_MASK & intr_status; 2568 hba->errors = UFSHCD_ERROR_MASK & intr_status;
2516 if (hba->errors) 2569 if (hba->errors)
2517 ufshcd_err_handler(hba); 2570 ufshcd_check_errors(hba);
2518 2571
2519 if (intr_status & UFSHCD_UIC_MASK) 2572 if (intr_status & UFSHCD_UIC_MASK)
2520 ufshcd_uic_cmd_compl(hba, intr_status); 2573 ufshcd_uic_cmd_compl(hba, intr_status);
@@ -2889,12 +2942,12 @@ static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd)
2889 */ 2942 */
2890 do { 2943 do {
2891 spin_lock_irqsave(hba->host->host_lock, flags); 2944 spin_lock_irqsave(hba->host->host_lock, flags);
2892 if (!(work_pending(&hba->feh_workq) || 2945 if (!(work_pending(&hba->eh_work) ||
2893 hba->ufshcd_state == UFSHCD_STATE_RESET)) 2946 hba->ufshcd_state == UFSHCD_STATE_RESET))
2894 break; 2947 break;
2895 spin_unlock_irqrestore(hba->host->host_lock, flags); 2948 spin_unlock_irqrestore(hba->host->host_lock, flags);
2896 dev_dbg(hba->dev, "%s: reset in progress\n", __func__); 2949 dev_dbg(hba->dev, "%s: reset in progress\n", __func__);
2897 flush_work(&hba->feh_workq); 2950 flush_work(&hba->eh_work);
2898 } while (1); 2951 } while (1);
2899 2952
2900 hba->ufshcd_state = UFSHCD_STATE_RESET; 2953 hba->ufshcd_state = UFSHCD_STATE_RESET;
@@ -3130,7 +3183,7 @@ int ufshcd_init(struct device *dev, struct ufs_hba **hba_handle,
3130 init_waitqueue_head(&hba->tm_tag_wq); 3183 init_waitqueue_head(&hba->tm_tag_wq);
3131 3184
3132 /* Initialize work queues */ 3185 /* Initialize work queues */
3133 INIT_WORK(&hba->feh_workq, ufshcd_fatal_err_handler); 3186 INIT_WORK(&hba->eh_work, ufshcd_err_handler);
3134 INIT_WORK(&hba->eeh_work, ufshcd_exception_event_handler); 3187 INIT_WORK(&hba->eeh_work, ufshcd_exception_event_handler);
3135 3188
3136 /* Initialize UIC command mutex */ 3189 /* Initialize UIC command mutex */
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 48c7d9b587f7..acf318e338ed 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -183,9 +183,12 @@ struct ufs_dev_cmd {
183 * @eh_flags: Error handling flags 183 * @eh_flags: Error handling flags
184 * @intr_mask: Interrupt Mask Bits 184 * @intr_mask: Interrupt Mask Bits
185 * @ee_ctrl_mask: Exception event control mask 185 * @ee_ctrl_mask: Exception event control mask
186 * @feh_workq: Work queue for fatal controller error handling 186 * @eh_work: Worker to handle UFS errors that require s/w attention
187 * @eeh_work: Worker to handle exception events 187 * @eeh_work: Worker to handle exception events
188 * @errors: HBA errors 188 * @errors: HBA errors
189 * @uic_error: UFS interconnect layer error status
190 * @saved_err: sticky error mask
191 * @saved_uic_err: sticky UIC error mask
189 * @dev_cmd: ufs device management command information 192 * @dev_cmd: ufs device management command information
190 * @auto_bkops_enabled: to track whether bkops is enabled in device 193 * @auto_bkops_enabled: to track whether bkops is enabled in device
191 */ 194 */
@@ -233,11 +236,14 @@ struct ufs_hba {
233 u16 ee_ctrl_mask; 236 u16 ee_ctrl_mask;
234 237
235 /* Work Queues */ 238 /* Work Queues */
236 struct work_struct feh_workq; 239 struct work_struct eh_work;
237 struct work_struct eeh_work; 240 struct work_struct eeh_work;
238 241
239 /* HBA Errors */ 242 /* HBA Errors */
240 u32 errors; 243 u32 errors;
244 u32 uic_error;
245 u32 saved_err;
246 u32 saved_uic_err;
241 247
242 /* Device management request data */ 248 /* Device management request data */
243 struct ufs_dev_cmd dev_cmd; 249 struct ufs_dev_cmd dev_cmd;