diff options
author | Sujit Reddy Thumma <sthumma@codeaurora.org> | 2014-05-26 01:29:15 -0400 |
---|---|---|
committer | Christoph Hellwig <hch@lst.de> | 2014-05-28 06:25:13 -0400 |
commit | e8e7f27139d5ba5b9f05fdee3f6224116ce8427c (patch) | |
tree | a8e3489ba489593086256f2d996289d9d151c7f5 | |
parent | 3441da7ddbdedf91bfd5cc8609c571ffc24942dd (diff) |
scsi: ufs: Improve UFS fatal error handling
Error handling in UFS driver is broken and resets the host controller
for fatal errors without re-initialization. Correct the fatal error
handling sequence according to UFS Host Controller Interface (HCI)
v1.1 specification.
o Processed requests which are completed w/wo error are reported to
SCSI layer and any pending commands that are not started are aborted
in the controller and re-queued into scsi mid-layer queue.
o Upon determining fatal error condition the host controller may hang
forever until a reset is applied. Block SCSI layer for sending new
requests and apply reset in a separate error handling work.
o SCSI is informed about the expected Unit-Attention exception from the
device for the immediate command after a reset so that the SCSI layer
take necessary steps to establish communication with the device.
Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
Reviewed-by: Yaniv Gardi <ygardi@codeaurora.org>
Tested-by: Dolev Raviv <draviv@codeaurora.org>
Acked-by: Vinayak Holikatti <vinholikatti@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
-rw-r--r-- | drivers/scsi/ufs/ufshcd.c | 229 | ||||
-rw-r--r-- | drivers/scsi/ufs/ufshcd.h | 10 |
2 files changed, 149 insertions, 90 deletions
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 54623104f262..0c2877251251 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c | |||
@@ -84,6 +84,14 @@ enum { | |||
84 | UFSHCD_EH_IN_PROGRESS = (1 << 0), | 84 | UFSHCD_EH_IN_PROGRESS = (1 << 0), |
85 | }; | 85 | }; |
86 | 86 | ||
87 | /* UFSHCD UIC layer error flags */ | ||
88 | enum { | ||
89 | UFSHCD_UIC_DL_PA_INIT_ERROR = (1 << 0), /* Data link layer error */ | ||
90 | UFSHCD_UIC_NL_ERROR = (1 << 1), /* Network layer error */ | ||
91 | UFSHCD_UIC_TL_ERROR = (1 << 2), /* Transport Layer error */ | ||
92 | UFSHCD_UIC_DME_ERROR = (1 << 3), /* DME error */ | ||
93 | }; | ||
94 | |||
87 | /* Interrupt configuration options */ | 95 | /* Interrupt configuration options */ |
88 | enum { | 96 | enum { |
89 | UFSHCD_INT_DISABLE, | 97 | UFSHCD_INT_DISABLE, |
@@ -100,6 +108,8 @@ enum { | |||
100 | 108 | ||
101 | static void ufshcd_tmc_handler(struct ufs_hba *hba); | 109 | static void ufshcd_tmc_handler(struct ufs_hba *hba); |
102 | static void ufshcd_async_scan(void *data, async_cookie_t cookie); | 110 | static void ufshcd_async_scan(void *data, async_cookie_t cookie); |
111 | static int ufshcd_reset_and_restore(struct ufs_hba *hba); | ||
112 | static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag); | ||
103 | 113 | ||
104 | /* | 114 | /* |
105 | * ufshcd_wait_for_register - wait for register value to change | 115 | * ufshcd_wait_for_register - wait for register value to change |
@@ -1735,9 +1745,6 @@ static int ufshcd_make_hba_operational(struct ufs_hba *hba) | |||
1735 | goto out; | 1745 | goto out; |
1736 | } | 1746 | } |
1737 | 1747 | ||
1738 | if (hba->ufshcd_state == UFSHCD_STATE_RESET) | ||
1739 | scsi_unblock_requests(hba->host); | ||
1740 | |||
1741 | out: | 1748 | out: |
1742 | return err; | 1749 | return err; |
1743 | } | 1750 | } |
@@ -1863,66 +1870,6 @@ static int ufshcd_verify_dev_init(struct ufs_hba *hba) | |||
1863 | } | 1870 | } |
1864 | 1871 | ||
1865 | /** | 1872 | /** |
1866 | * ufshcd_do_reset - reset the host controller | ||
1867 | * @hba: per adapter instance | ||
1868 | * | ||
1869 | * Returns SUCCESS/FAILED | ||
1870 | */ | ||
1871 | static int ufshcd_do_reset(struct ufs_hba *hba) | ||
1872 | { | ||
1873 | struct ufshcd_lrb *lrbp; | ||
1874 | unsigned long flags; | ||
1875 | int tag; | ||
1876 | |||
1877 | /* block commands from midlayer */ | ||
1878 | scsi_block_requests(hba->host); | ||
1879 | |||
1880 | spin_lock_irqsave(hba->host->host_lock, flags); | ||
1881 | hba->ufshcd_state = UFSHCD_STATE_RESET; | ||
1882 | |||
1883 | /* send controller to reset state */ | ||
1884 | ufshcd_hba_stop(hba); | ||
1885 | spin_unlock_irqrestore(hba->host->host_lock, flags); | ||
1886 | |||
1887 | /* abort outstanding commands */ | ||
1888 | for (tag = 0; tag < hba->nutrs; tag++) { | ||
1889 | if (test_bit(tag, &hba->outstanding_reqs)) { | ||
1890 | lrbp = &hba->lrb[tag]; | ||
1891 | if (lrbp->cmd) { | ||
1892 | scsi_dma_unmap(lrbp->cmd); | ||
1893 | lrbp->cmd->result = DID_RESET << 16; | ||
1894 | lrbp->cmd->scsi_done(lrbp->cmd); | ||
1895 | lrbp->cmd = NULL; | ||
1896 | clear_bit_unlock(tag, &hba->lrb_in_use); | ||
1897 | } | ||
1898 | } | ||
1899 | } | ||
1900 | |||
1901 | /* complete device management command */ | ||
1902 | if (hba->dev_cmd.complete) | ||
1903 | complete(hba->dev_cmd.complete); | ||
1904 | |||
1905 | /* clear outstanding request/task bit maps */ | ||
1906 | hba->outstanding_reqs = 0; | ||
1907 | hba->outstanding_tasks = 0; | ||
1908 | |||
1909 | /* Host controller enable */ | ||
1910 | if (ufshcd_hba_enable(hba)) { | ||
1911 | dev_err(hba->dev, | ||
1912 | "Reset: Controller initialization failed\n"); | ||
1913 | return FAILED; | ||
1914 | } | ||
1915 | |||
1916 | if (ufshcd_link_startup(hba)) { | ||
1917 | dev_err(hba->dev, | ||
1918 | "Reset: Link start-up failed\n"); | ||
1919 | return FAILED; | ||
1920 | } | ||
1921 | |||
1922 | return SUCCESS; | ||
1923 | } | ||
1924 | |||
1925 | /** | ||
1926 | * ufshcd_slave_alloc - handle initial SCSI device configurations | 1873 | * ufshcd_slave_alloc - handle initial SCSI device configurations |
1927 | * @sdev: pointer to SCSI device | 1874 | * @sdev: pointer to SCSI device |
1928 | * | 1875 | * |
@@ -1939,6 +1886,9 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev) | |||
1939 | sdev->use_10_for_ms = 1; | 1886 | sdev->use_10_for_ms = 1; |
1940 | scsi_set_tag_type(sdev, MSG_SIMPLE_TAG); | 1887 | scsi_set_tag_type(sdev, MSG_SIMPLE_TAG); |
1941 | 1888 | ||
1889 | /* allow SCSI layer to restart the device in case of errors */ | ||
1890 | sdev->allow_restart = 1; | ||
1891 | |||
1942 | /* | 1892 | /* |
1943 | * Inform SCSI Midlayer that the LUN queue depth is same as the | 1893 | * Inform SCSI Midlayer that the LUN queue depth is same as the |
1944 | * controller queue depth. If a LUN queue depth is less than the | 1894 | * controller queue depth. If a LUN queue depth is less than the |
@@ -2134,6 +2084,9 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) | |||
2134 | case OCS_ABORTED: | 2084 | case OCS_ABORTED: |
2135 | result |= DID_ABORT << 16; | 2085 | result |= DID_ABORT << 16; |
2136 | break; | 2086 | break; |
2087 | case OCS_INVALID_COMMAND_STATUS: | ||
2088 | result |= DID_REQUEUE << 16; | ||
2089 | break; | ||
2137 | case OCS_INVALID_CMD_TABLE_ATTR: | 2090 | case OCS_INVALID_CMD_TABLE_ATTR: |
2138 | case OCS_INVALID_PRDT_ATTR: | 2091 | case OCS_INVALID_PRDT_ATTR: |
2139 | case OCS_MISMATCH_DATA_BUF_SIZE: | 2092 | case OCS_MISMATCH_DATA_BUF_SIZE: |
@@ -2451,45 +2404,145 @@ out: | |||
2451 | } | 2404 | } |
2452 | 2405 | ||
2453 | /** | 2406 | /** |
2454 | * ufshcd_fatal_err_handler - handle fatal errors | 2407 | * ufshcd_err_handler - handle UFS errors that require s/w attention |
2455 | * @hba: per adapter instance | 2408 | * @work: pointer to work structure |
2456 | */ | 2409 | */ |
2457 | static void ufshcd_fatal_err_handler(struct work_struct *work) | 2410 | static void ufshcd_err_handler(struct work_struct *work) |
2458 | { | 2411 | { |
2459 | struct ufs_hba *hba; | 2412 | struct ufs_hba *hba; |
2460 | hba = container_of(work, struct ufs_hba, feh_workq); | 2413 | unsigned long flags; |
2414 | u32 err_xfer = 0; | ||
2415 | u32 err_tm = 0; | ||
2416 | int err = 0; | ||
2417 | int tag; | ||
2418 | |||
2419 | hba = container_of(work, struct ufs_hba, eh_work); | ||
2461 | 2420 | ||
2462 | pm_runtime_get_sync(hba->dev); | 2421 | pm_runtime_get_sync(hba->dev); |
2463 | /* check if reset is already in progress */ | 2422 | |
2464 | if (hba->ufshcd_state != UFSHCD_STATE_RESET) | 2423 | spin_lock_irqsave(hba->host->host_lock, flags); |
2465 | ufshcd_do_reset(hba); | 2424 | if (hba->ufshcd_state == UFSHCD_STATE_RESET) { |
2425 | spin_unlock_irqrestore(hba->host->host_lock, flags); | ||
2426 | goto out; | ||
2427 | } | ||
2428 | |||
2429 | hba->ufshcd_state = UFSHCD_STATE_RESET; | ||
2430 | ufshcd_set_eh_in_progress(hba); | ||
2431 | |||
2432 | /* Complete requests that have door-bell cleared by h/w */ | ||
2433 | ufshcd_transfer_req_compl(hba); | ||
2434 | ufshcd_tmc_handler(hba); | ||
2435 | spin_unlock_irqrestore(hba->host->host_lock, flags); | ||
2436 | |||
2437 | /* Clear pending transfer requests */ | ||
2438 | for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) | ||
2439 | if (ufshcd_clear_cmd(hba, tag)) | ||
2440 | err_xfer |= 1 << tag; | ||
2441 | |||
2442 | /* Clear pending task management requests */ | ||
2443 | for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) | ||
2444 | if (ufshcd_clear_tm_cmd(hba, tag)) | ||
2445 | err_tm |= 1 << tag; | ||
2446 | |||
2447 | /* Complete the requests that are cleared by s/w */ | ||
2448 | spin_lock_irqsave(hba->host->host_lock, flags); | ||
2449 | ufshcd_transfer_req_compl(hba); | ||
2450 | ufshcd_tmc_handler(hba); | ||
2451 | spin_unlock_irqrestore(hba->host->host_lock, flags); | ||
2452 | |||
2453 | /* Fatal errors need reset */ | ||
2454 | if (err_xfer || err_tm || (hba->saved_err & INT_FATAL_ERRORS) || | ||
2455 | ((hba->saved_err & UIC_ERROR) && | ||
2456 | (hba->saved_uic_err & UFSHCD_UIC_DL_PA_INIT_ERROR))) { | ||
2457 | err = ufshcd_reset_and_restore(hba); | ||
2458 | if (err) { | ||
2459 | dev_err(hba->dev, "%s: reset and restore failed\n", | ||
2460 | __func__); | ||
2461 | hba->ufshcd_state = UFSHCD_STATE_ERROR; | ||
2462 | } | ||
2463 | /* | ||
2464 | * Inform scsi mid-layer that we did reset and allow to handle | ||
2465 | * Unit Attention properly. | ||
2466 | */ | ||
2467 | scsi_report_bus_reset(hba->host, 0); | ||
2468 | hba->saved_err = 0; | ||
2469 | hba->saved_uic_err = 0; | ||
2470 | } | ||
2471 | ufshcd_clear_eh_in_progress(hba); | ||
2472 | |||
2473 | out: | ||
2474 | scsi_unblock_requests(hba->host); | ||
2466 | pm_runtime_put_sync(hba->dev); | 2475 | pm_runtime_put_sync(hba->dev); |
2467 | } | 2476 | } |
2468 | 2477 | ||
2469 | /** | 2478 | /** |
2470 | * ufshcd_err_handler - Check for fatal errors | 2479 | * ufshcd_update_uic_error - check and set fatal UIC error flags. |
2471 | * @work: pointer to a work queue structure | 2480 | * @hba: per-adapter instance |
2472 | */ | 2481 | */ |
2473 | static void ufshcd_err_handler(struct ufs_hba *hba) | 2482 | static void ufshcd_update_uic_error(struct ufs_hba *hba) |
2474 | { | 2483 | { |
2475 | u32 reg; | 2484 | u32 reg; |
2476 | 2485 | ||
2486 | /* PA_INIT_ERROR is fatal and needs UIC reset */ | ||
2487 | reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER); | ||
2488 | if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT) | ||
2489 | hba->uic_error |= UFSHCD_UIC_DL_PA_INIT_ERROR; | ||
2490 | |||
2491 | /* UIC NL/TL/DME errors needs software retry */ | ||
2492 | reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_NETWORK_LAYER); | ||
2493 | if (reg) | ||
2494 | hba->uic_error |= UFSHCD_UIC_NL_ERROR; | ||
2495 | |||
2496 | reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_TRANSPORT_LAYER); | ||
2497 | if (reg) | ||
2498 | hba->uic_error |= UFSHCD_UIC_TL_ERROR; | ||
2499 | |||
2500 | reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DME); | ||
2501 | if (reg) | ||
2502 | hba->uic_error |= UFSHCD_UIC_DME_ERROR; | ||
2503 | |||
2504 | dev_dbg(hba->dev, "%s: UIC error flags = 0x%08x\n", | ||
2505 | __func__, hba->uic_error); | ||
2506 | } | ||
2507 | |||
2508 | /** | ||
2509 | * ufshcd_check_errors - Check for errors that need s/w attention | ||
2510 | * @hba: per-adapter instance | ||
2511 | */ | ||
2512 | static void ufshcd_check_errors(struct ufs_hba *hba) | ||
2513 | { | ||
2514 | bool queue_eh_work = false; | ||
2515 | |||
2477 | if (hba->errors & INT_FATAL_ERRORS) | 2516 | if (hba->errors & INT_FATAL_ERRORS) |
2478 | goto fatal_eh; | 2517 | queue_eh_work = true; |
2479 | 2518 | ||
2480 | if (hba->errors & UIC_ERROR) { | 2519 | if (hba->errors & UIC_ERROR) { |
2481 | reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER); | 2520 | hba->uic_error = 0; |
2482 | if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT) | 2521 | ufshcd_update_uic_error(hba); |
2483 | goto fatal_eh; | 2522 | if (hba->uic_error) |
2523 | queue_eh_work = true; | ||
2484 | } | 2524 | } |
2485 | return; | 2525 | |
2486 | fatal_eh: | 2526 | if (queue_eh_work) { |
2487 | /* handle fatal errors only when link is functional */ | 2527 | /* handle fatal errors only when link is functional */ |
2488 | if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) { | 2528 | if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) { |
2489 | /* block commands at driver layer until error is handled */ | 2529 | /* block commands from scsi mid-layer */ |
2490 | hba->ufshcd_state = UFSHCD_STATE_ERROR; | 2530 | scsi_block_requests(hba->host); |
2491 | schedule_work(&hba->feh_workq); | 2531 | |
2532 | /* transfer error masks to sticky bits */ | ||
2533 | hba->saved_err |= hba->errors; | ||
2534 | hba->saved_uic_err |= hba->uic_error; | ||
2535 | |||
2536 | hba->ufshcd_state = UFSHCD_STATE_ERROR; | ||
2537 | schedule_work(&hba->eh_work); | ||
2538 | } | ||
2492 | } | 2539 | } |
2540 | /* | ||
2541 | * if (!queue_eh_work) - | ||
2542 | * Other errors are either non-fatal where host recovers | ||
2543 | * itself without s/w intervention or errors that will be | ||
2544 | * handled by the SCSI core layer. | ||
2545 | */ | ||
2493 | } | 2546 | } |
2494 | 2547 | ||
2495 | /** | 2548 | /** |
@@ -2514,7 +2567,7 @@ static void ufshcd_sl_intr(struct ufs_hba *hba, u32 intr_status) | |||
2514 | { | 2567 | { |
2515 | hba->errors = UFSHCD_ERROR_MASK & intr_status; | 2568 | hba->errors = UFSHCD_ERROR_MASK & intr_status; |
2516 | if (hba->errors) | 2569 | if (hba->errors) |
2517 | ufshcd_err_handler(hba); | 2570 | ufshcd_check_errors(hba); |
2518 | 2571 | ||
2519 | if (intr_status & UFSHCD_UIC_MASK) | 2572 | if (intr_status & UFSHCD_UIC_MASK) |
2520 | ufshcd_uic_cmd_compl(hba, intr_status); | 2573 | ufshcd_uic_cmd_compl(hba, intr_status); |
@@ -2889,12 +2942,12 @@ static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd) | |||
2889 | */ | 2942 | */ |
2890 | do { | 2943 | do { |
2891 | spin_lock_irqsave(hba->host->host_lock, flags); | 2944 | spin_lock_irqsave(hba->host->host_lock, flags); |
2892 | if (!(work_pending(&hba->feh_workq) || | 2945 | if (!(work_pending(&hba->eh_work) || |
2893 | hba->ufshcd_state == UFSHCD_STATE_RESET)) | 2946 | hba->ufshcd_state == UFSHCD_STATE_RESET)) |
2894 | break; | 2947 | break; |
2895 | spin_unlock_irqrestore(hba->host->host_lock, flags); | 2948 | spin_unlock_irqrestore(hba->host->host_lock, flags); |
2896 | dev_dbg(hba->dev, "%s: reset in progress\n", __func__); | 2949 | dev_dbg(hba->dev, "%s: reset in progress\n", __func__); |
2897 | flush_work(&hba->feh_workq); | 2950 | flush_work(&hba->eh_work); |
2898 | } while (1); | 2951 | } while (1); |
2899 | 2952 | ||
2900 | hba->ufshcd_state = UFSHCD_STATE_RESET; | 2953 | hba->ufshcd_state = UFSHCD_STATE_RESET; |
@@ -3130,7 +3183,7 @@ int ufshcd_init(struct device *dev, struct ufs_hba **hba_handle, | |||
3130 | init_waitqueue_head(&hba->tm_tag_wq); | 3183 | init_waitqueue_head(&hba->tm_tag_wq); |
3131 | 3184 | ||
3132 | /* Initialize work queues */ | 3185 | /* Initialize work queues */ |
3133 | INIT_WORK(&hba->feh_workq, ufshcd_fatal_err_handler); | 3186 | INIT_WORK(&hba->eh_work, ufshcd_err_handler); |
3134 | INIT_WORK(&hba->eeh_work, ufshcd_exception_event_handler); | 3187 | INIT_WORK(&hba->eeh_work, ufshcd_exception_event_handler); |
3135 | 3188 | ||
3136 | /* Initialize UIC command mutex */ | 3189 | /* Initialize UIC command mutex */ |
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 48c7d9b587f7..acf318e338ed 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h | |||
@@ -183,9 +183,12 @@ struct ufs_dev_cmd { | |||
183 | * @eh_flags: Error handling flags | 183 | * @eh_flags: Error handling flags |
184 | * @intr_mask: Interrupt Mask Bits | 184 | * @intr_mask: Interrupt Mask Bits |
185 | * @ee_ctrl_mask: Exception event control mask | 185 | * @ee_ctrl_mask: Exception event control mask |
186 | * @feh_workq: Work queue for fatal controller error handling | 186 | * @eh_work: Worker to handle UFS errors that require s/w attention |
187 | * @eeh_work: Worker to handle exception events | 187 | * @eeh_work: Worker to handle exception events |
188 | * @errors: HBA errors | 188 | * @errors: HBA errors |
189 | * @uic_error: UFS interconnect layer error status | ||
190 | * @saved_err: sticky error mask | ||
191 | * @saved_uic_err: sticky UIC error mask | ||
189 | * @dev_cmd: ufs device management command information | 192 | * @dev_cmd: ufs device management command information |
190 | * @auto_bkops_enabled: to track whether bkops is enabled in device | 193 | * @auto_bkops_enabled: to track whether bkops is enabled in device |
191 | */ | 194 | */ |
@@ -233,11 +236,14 @@ struct ufs_hba { | |||
233 | u16 ee_ctrl_mask; | 236 | u16 ee_ctrl_mask; |
234 | 237 | ||
235 | /* Work Queues */ | 238 | /* Work Queues */ |
236 | struct work_struct feh_workq; | 239 | struct work_struct eh_work; |
237 | struct work_struct eeh_work; | 240 | struct work_struct eeh_work; |
238 | 241 | ||
239 | /* HBA Errors */ | 242 | /* HBA Errors */ |
240 | u32 errors; | 243 | u32 errors; |
244 | u32 uic_error; | ||
245 | u32 saved_err; | ||
246 | u32 saved_uic_err; | ||
241 | 247 | ||
242 | /* Device management request data */ | 248 | /* Device management request data */ |
243 | struct ufs_dev_cmd dev_cmd; | 249 | struct ufs_dev_cmd dev_cmd; |