aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/mmc/card
diff options
context:
space:
mode:
authorRussell King - ARM Linux <linux@arm.linux.org.uk>2011-06-20 15:10:28 -0400
committerChris Ball <cjb@laptop.org>2011-07-20 17:20:54 -0400
commita01f3ccf845067de32189f8a8e85d22c381f93b9 (patch)
treed5d5fedaadaae9abb435cfae1ee0c4d0e4ea036c /drivers/mmc/card
parent0a2d4048a22079d7e79d6654bbacbef57bd5728a (diff)
mmc: block: improve error recovery from command channel errors
Command channel errors fall into four classes: 1. The command was issued with the card in the wrong state 2. The command failed to be received by the card correctly 3. The cards response failed to be received by the host (CRC error) 4. The card failed to respond to the card For (1), in theory we should know that the card is in the correct state. However, a failed stop command (or other failure) may result in the card remaining in a data transfer state from the previous command. If we detect this condition, we try to recover by sending a stop command. For the initial commands (set block count and the read/write command) no data will have been transferred. All that we need deal with is retrying at this point. A failed stop command can be remedied as above. If we are unable to recover the card (eg, the card ignores our requests for status, or we don't recognise the error code) then we immediately fail the request. Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> Acked-by: Linus Walleij <linus.walleij@linaro.org> Tested-by: Pawel Moll <pawel.moll@arm.com> Signed-off-by: Chris Ball <cjb@laptop.org>
Diffstat (limited to 'drivers/mmc/card')
-rw-r--r--drivers/mmc/card/block.c230
1 files changed, 182 insertions, 48 deletions
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 3200e2ca6a9d..ff347319ff80 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -525,6 +525,19 @@ static u32 mmc_sd_num_wr_blocks(struct mmc_card *card)
525 return result; 525 return result;
526} 526}
527 527
528static int send_stop(struct mmc_card *card, u32 *status)
529{
530 struct mmc_command cmd = {0};
531 int err;
532
533 cmd.opcode = MMC_STOP_TRANSMISSION;
534 cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
535 err = mmc_wait_for_cmd(card->host, &cmd, 5);
536 if (err == 0)
537 *status = cmd.resp[0];
538 return err;
539}
540
528static int get_card_status(struct mmc_card *card, u32 *status, int retries) 541static int get_card_status(struct mmc_card *card, u32 *status, int retries)
529{ 542{
530 struct mmc_command cmd = {0}; 543 struct mmc_command cmd = {0};
@@ -540,6 +553,137 @@ static int get_card_status(struct mmc_card *card, u32 *status, int retries)
540 return err; 553 return err;
541} 554}
542 555
556#define ERR_RETRY 2
557#define ERR_ABORT 1
558#define ERR_CONTINUE 0
559
560static int mmc_blk_cmd_error(struct request *req, const char *name, int error,
561 bool status_valid, u32 status)
562{
563 switch (error) {
564 case -EILSEQ:
565 /* response crc error, retry the r/w cmd */
566 pr_err("%s: %s sending %s command, card status %#x\n",
567 req->rq_disk->disk_name, "response CRC error",
568 name, status);
569 return ERR_RETRY;
570
571 case -ETIMEDOUT:
572 pr_err("%s: %s sending %s command, card status %#x\n",
573 req->rq_disk->disk_name, "timed out", name, status);
574
575 /* If the status cmd initially failed, retry the r/w cmd */
576 if (!status_valid)
577 return ERR_RETRY;
578
579 /*
580 * If it was a r/w cmd crc error, or illegal command
581 * (eg, issued in wrong state) then retry - we should
582 * have corrected the state problem above.
583 */
584 if (status & (R1_COM_CRC_ERROR | R1_ILLEGAL_COMMAND))
585 return ERR_RETRY;
586
587 /* Otherwise abort the command */
588 return ERR_ABORT;
589
590 default:
591 /* We don't understand the error code the driver gave us */
592 pr_err("%s: unknown error %d sending read/write command, card status %#x\n",
593 req->rq_disk->disk_name, error, status);
594 return ERR_ABORT;
595 }
596}
597
598/*
599 * Initial r/w and stop cmd error recovery.
600 * We don't know whether the card received the r/w cmd or not, so try to
601 * restore things back to a sane state. Essentially, we do this as follows:
602 * - Obtain card status. If the first attempt to obtain card status fails,
603 * the status word will reflect the failed status cmd, not the failed
604 * r/w cmd. If we fail to obtain card status, it suggests we can no
605 * longer communicate with the card.
606 * - Check the card state. If the card received the cmd but there was a
607 * transient problem with the response, it might still be in a data transfer
608 * mode. Try to send it a stop command. If this fails, we can't recover.
609 * - If the r/w cmd failed due to a response CRC error, it was probably
610 * transient, so retry the cmd.
611 * - If the r/w cmd timed out, but we didn't get the r/w cmd status, retry.
612 * - If the r/w cmd timed out, and the r/w cmd failed due to CRC error or
613 * illegal cmd, retry.
614 * Otherwise we don't understand what happened, so abort.
615 */
616static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req,
617 struct mmc_blk_request *brq)
618{
619 bool prev_cmd_status_valid = true;
620 u32 status, stop_status = 0;
621 int err, retry;
622
623 /*
624 * Try to get card status which indicates both the card state
625 * and why there was no response. If the first attempt fails,
626 * we can't be sure the returned status is for the r/w command.
627 */
628 for (retry = 2; retry >= 0; retry--) {
629 err = get_card_status(card, &status, 0);
630 if (!err)
631 break;
632
633 prev_cmd_status_valid = false;
634 pr_err("%s: error %d sending status command, %sing\n",
635 req->rq_disk->disk_name, err, retry ? "retry" : "abort");
636 }
637
638 /* We couldn't get a response from the card. Give up. */
639 if (err)
640 return ERR_ABORT;
641
642 /*
643 * Check the current card state. If it is in some data transfer
644 * mode, tell it to stop (and hopefully transition back to TRAN.)
645 */
646 if (R1_CURRENT_STATE(status) == R1_STATE_DATA ||
647 R1_CURRENT_STATE(status) == R1_STATE_RCV) {
648 err = send_stop(card, &stop_status);
649 if (err)
650 pr_err("%s: error %d sending stop command\n",
651 req->rq_disk->disk_name, err);
652
653 /*
654 * If the stop cmd also timed out, the card is probably
655 * not present, so abort. Other errors are bad news too.
656 */
657 if (err)
658 return ERR_ABORT;
659 }
660
661 /* Check for set block count errors */
662 if (brq->sbc.error)
663 return mmc_blk_cmd_error(req, "SET_BLOCK_COUNT", brq->sbc.error,
664 prev_cmd_status_valid, status);
665
666 /* Check for r/w command errors */
667 if (brq->cmd.error)
668 return mmc_blk_cmd_error(req, "r/w cmd", brq->cmd.error,
669 prev_cmd_status_valid, status);
670
671 /* Now for stop errors. These aren't fatal to the transfer. */
672 pr_err("%s: error %d sending stop command, original cmd response %#x, card status %#x\n",
673 req->rq_disk->disk_name, brq->stop.error,
674 brq->cmd.resp[0], status);
675
676 /*
677 * Subsitute in our own stop status as this will give the error
678 * state which happened during the execution of the r/w command.
679 */
680 if (stop_status) {
681 brq->stop.resp[0] = stop_status;
682 brq->stop.error = 0;
683 }
684 return ERR_CONTINUE;
685}
686
543static int mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) 687static int mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
544{ 688{
545 struct mmc_blk_data *md = mq->data; 689 struct mmc_blk_data *md = mq->data;
@@ -673,7 +817,7 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
673 struct mmc_blk_data *md = mq->data; 817 struct mmc_blk_data *md = mq->data;
674 struct mmc_card *card = md->queue.card; 818 struct mmc_card *card = md->queue.card;
675 struct mmc_blk_request brq; 819 struct mmc_blk_request brq;
676 int ret = 1, disable_multi = 0; 820 int ret = 1, disable_multi = 0, retry = 0;
677 821
678 /* 822 /*
679 * Reliable writes are used to implement Forced Unit Access and 823 * Reliable writes are used to implement Forced Unit Access and
@@ -685,7 +829,7 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
685 (md->flags & MMC_BLK_REL_WR); 829 (md->flags & MMC_BLK_REL_WR);
686 830
687 do { 831 do {
688 u32 readcmd, writecmd, status = 0; 832 u32 readcmd, writecmd;
689 833
690 memset(&brq, 0, sizeof(struct mmc_blk_request)); 834 memset(&brq, 0, sizeof(struct mmc_blk_request));
691 brq.mrq.cmd = &brq.cmd; 835 brq.mrq.cmd = &brq.cmd;
@@ -802,55 +946,29 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
802 mmc_queue_bounce_post(mq); 946 mmc_queue_bounce_post(mq);
803 947
804 /* 948 /*
805 * Check for errors here, but don't jump to cmd_err 949 * sbc.error indicates a problem with the set block count
806 * until later as we need to wait for the card to leave 950 * command. No data will have been transferred.
807 * programming mode even when things go wrong. 951 *
952 * cmd.error indicates a problem with the r/w command. No
953 * data will have been transferred.
954 *
955 * stop.error indicates a problem with the stop command. Data
956 * may have been transferred, or may still be transferring.
808 */ 957 */
809 if (brq.sbc.error || brq.cmd.error || 958 if (brq.sbc.error || brq.cmd.error || brq.stop.error) {
810 brq.data.error || brq.stop.error) { 959 switch (mmc_blk_cmd_recovery(card, req, &brq)) {
811 if (brq.data.blocks > 1 && rq_data_dir(req) == READ) { 960 case ERR_RETRY:
812 /* Redo read one sector at a time */ 961 if (retry++ < 5)
813 printk(KERN_WARNING "%s: retrying using single " 962 continue;
814 "block read\n", req->rq_disk->disk_name); 963 case ERR_ABORT:
815 disable_multi = 1; 964 goto cmd_abort;
816 continue; 965 case ERR_CONTINUE:
966 break;
817 } 967 }
818 get_card_status(card, &status, 0);
819 }
820
821 if (brq.sbc.error) {
822 printk(KERN_ERR "%s: error %d sending SET_BLOCK_COUNT "
823 "command, response %#x, card status %#x\n",
824 req->rq_disk->disk_name, brq.sbc.error,
825 brq.sbc.resp[0], status);
826 }
827
828 if (brq.cmd.error) {
829 printk(KERN_ERR "%s: error %d sending read/write "
830 "command, response %#x, card status %#x\n",
831 req->rq_disk->disk_name, brq.cmd.error,
832 brq.cmd.resp[0], status);
833 }
834
835 if (brq.data.error) {
836 if (brq.data.error == -ETIMEDOUT && brq.mrq.stop)
837 /* 'Stop' response contains card status */
838 status = brq.mrq.stop->resp[0];
839 printk(KERN_ERR "%s: error %d transferring data,"
840 " sector %u, nr %u, card status %#x\n",
841 req->rq_disk->disk_name, brq.data.error,
842 (unsigned)blk_rq_pos(req),
843 (unsigned)blk_rq_sectors(req), status);
844 }
845
846 if (brq.stop.error) {
847 printk(KERN_ERR "%s: error %d sending stop command, "
848 "response %#x, card status %#x\n",
849 req->rq_disk->disk_name, brq.stop.error,
850 brq.stop.resp[0], status);
851 } 968 }
852 969
853 if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) { 970 if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) {
971 u32 status;
854 do { 972 do {
855 int err = get_card_status(card, &status, 5); 973 int err = get_card_status(card, &status, 5);
856 if (err) { 974 if (err) {
@@ -867,8 +985,22 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
867 (R1_CURRENT_STATE(status) == R1_STATE_PRG)); 985 (R1_CURRENT_STATE(status) == R1_STATE_PRG));
868 } 986 }
869 987
870 if (brq.cmd.error || brq.stop.error || brq.data.error) { 988 if (brq.data.error) {
989 pr_err("%s: error %d transferring data, sector %u nr %u, cmd response %#x card status %#x\n",
990 req->rq_disk->disk_name, brq.data.error,
991 (unsigned)blk_rq_pos(req),
992 (unsigned)blk_rq_sectors(req),
993 brq.cmd.resp[0], brq.stop.resp[0]);
994
871 if (rq_data_dir(req) == READ) { 995 if (rq_data_dir(req) == READ) {
996 if (brq.data.blocks > 1) {
997 /* Redo read one sector at a time */
998 pr_warning("%s: retrying using single block read\n",
999 req->rq_disk->disk_name);
1000 disable_multi = 1;
1001 continue;
1002 }
1003
872 /* 1004 /*
873 * After an error, we redo I/O one sector at a 1005 * After an error, we redo I/O one sector at a
874 * time, so we only reach here after trying to 1006 * time, so we only reach here after trying to
@@ -878,8 +1010,9 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
878 ret = __blk_end_request(req, -EIO, brq.data.blksz); 1010 ret = __blk_end_request(req, -EIO, brq.data.blksz);
879 spin_unlock_irq(&md->lock); 1011 spin_unlock_irq(&md->lock);
880 continue; 1012 continue;
1013 } else {
1014 goto cmd_err;
881 } 1015 }
882 goto cmd_err;
883 } 1016 }
884 1017
885 /* 1018 /*
@@ -916,6 +1049,7 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
916 spin_unlock_irq(&md->lock); 1049 spin_unlock_irq(&md->lock);
917 } 1050 }
918 1051
1052 cmd_abort:
919 spin_lock_irq(&md->lock); 1053 spin_lock_irq(&md->lock);
920 while (ret) 1054 while (ret)
921 ret = __blk_end_request(req, -EIO, blk_rq_cur_bytes(req)); 1055 ret = __blk_end_request(req, -EIO, blk_rq_cur_bytes(req));