aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/cciss_scsi.c
diff options
context:
space:
mode:
authorStephen M. Cameron <scameron@beardog.cca.cpqcorp.net>2009-06-02 08:48:11 -0400
committerJens Axboe <jens.axboe@oracle.com>2009-06-02 08:48:11 -0400
commit88f627ae394eadd75ada669904269f1a4a77b3bd (patch)
tree679356d2253914ae93132c3d0116111e63284cba /drivers/block/cciss_scsi.c
parent4a4b2d7684c66dbd8ed04eb284bc94a78e061d29 (diff)
cciss: fix SCSI device reset handler
Fix the SCSI reset error handler to send a working, properly addressed reset message to the target device and add code to wait for the target device to become ready by polling it with Test Unit Ready. The existing reset code was broken in that it didn't bother to set the 8-byte LUN address to anything besides zero, so the command was addressed to the controller, which pretended to the driver that the command succeeded, while doing nothing. Ages ago I tested this code, but unbeknownst to me, my test was flawed, and what I thought was a tape drive getting reset was actually nothing of the sort. Unfortunately, there is still lots of Smartarray firmware that doesn't handle doing target resets right, and this code won't help in those cases, but it also shouldn't make things worse in those cases than they already are. Signed-off-by: Stephen M. Cameron <scameron@beardog.cca.cpqcorp.net> Cc: Mike Miller <mikem@beardog.cca.cpqcorp.net> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'drivers/block/cciss_scsi.c')
-rw-r--r--drivers/block/cciss_scsi.c85
1 files changed, 79 insertions, 6 deletions
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index a3fd87b41444..8575c48c8917 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -58,6 +58,18 @@ static int sendcmd(
58 unsigned char *scsi3addr, 58 unsigned char *scsi3addr,
59 int cmd_type); 59 int cmd_type);
60 60
61static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff,
62 size_t size,
63 unsigned int use_unit_num, /* 0: address the controller,
64 1: address logical volume log_unit,
65 2: periph device address is scsi3addr */
66 unsigned int log_unit, __u8 page_code, unsigned char *scsi3addr,
67 int cmd_type);
68
69static int sendcmd_core(ctlr_info_t *h, CommandList_struct *c);
70
71static CommandList_struct *cmd_alloc(ctlr_info_t *h, int get_from_pool);
72static void cmd_free(ctlr_info_t *h, CommandList_struct *c, int got_from_pool);
61 73
62static int cciss_scsi_proc_info( 74static int cciss_scsi_proc_info(
63 struct Scsi_Host *sh, 75 struct Scsi_Host *sh,
@@ -1575,6 +1587,68 @@ cciss_seq_tape_report(struct seq_file *seq, int ctlr)
1575 CPQ_TAPE_UNLOCK(ctlr, flags); 1587 CPQ_TAPE_UNLOCK(ctlr, flags);
1576} 1588}
1577 1589
1590static int wait_for_device_to_become_ready(ctlr_info_t *h,
1591 unsigned char lunaddr[])
1592{
1593 int rc;
1594 int count = 0;
1595 int waittime = HZ;
1596 CommandList_struct *c;
1597
1598 c = cmd_alloc(h, 1);
1599 if (!c) {
1600 printk(KERN_WARNING "cciss%d: out of memory in "
1601 "wait_for_device_to_become_ready.\n", h->ctlr);
1602 return IO_ERROR;
1603 }
1604
1605 /* Send test unit ready until device ready, or give up. */
1606 while (count < 20) {
1607
1608 /* Wait for a bit. do this first, because if we send
1609 * the TUR right away, the reset will just abort it.
1610 */
1611 set_current_state(TASK_INTERRUPTIBLE);
1612 schedule_timeout(waittime);
1613 count++;
1614
1615 /* Increase wait time with each try, up to a point. */
1616 if (waittime < (HZ * 30))
1617 waittime = waittime * 2;
1618
1619 /* Send the Test Unit Ready */
1620 rc = fill_cmd(c, TEST_UNIT_READY, h->ctlr, NULL, 0, 0, 0, 0,
1621 lunaddr, TYPE_CMD);
1622 if (rc == 0) {
1623 rc = sendcmd_core(h, c);
1624 /* sendcmd turned off interrupts, turn 'em back on. */
1625 h->access.set_intr_mask(h, CCISS_INTR_ON);
1626 }
1627
1628 if (rc == 0 && c->err_info->CommandStatus == CMD_SUCCESS)
1629 break;
1630
1631 if (rc == 0 &&
1632 c->err_info->CommandStatus == CMD_TARGET_STATUS &&
1633 c->err_info->ScsiStatus == SAM_STAT_CHECK_CONDITION &&
1634 (c->err_info->SenseInfo[2] == NO_SENSE ||
1635 c->err_info->SenseInfo[2] == UNIT_ATTENTION))
1636 break;
1637
1638 printk(KERN_WARNING "cciss%d: Waiting %d secs "
1639 "for device to become ready.\n",
1640 h->ctlr, waittime / HZ);
1641 rc = 1; /* device not ready. */
1642 }
1643
1644 if (rc)
1645 printk("cciss%d: giving up on device.\n", h->ctlr);
1646 else
1647 printk(KERN_WARNING "cciss%d: device is ready.\n", h->ctlr);
1648
1649 cmd_free(h, c, 1);
1650 return rc;
1651}
1578 1652
1579/* Need at least one of these error handlers to keep ../scsi/hosts.c from 1653/* Need at least one of these error handlers to keep ../scsi/hosts.c from
1580 * complaining. Doing a host- or bus-reset can't do anything good here. 1654 * complaining. Doing a host- or bus-reset can't do anything good here.
@@ -1591,6 +1665,7 @@ static int cciss_eh_device_reset_handler(struct scsi_cmnd *scsicmd)
1591{ 1665{
1592 int rc; 1666 int rc;
1593 CommandList_struct *cmd_in_trouble; 1667 CommandList_struct *cmd_in_trouble;
1668 unsigned char lunaddr[8];
1594 ctlr_info_t **c; 1669 ctlr_info_t **c;
1595 int ctlr; 1670 int ctlr;
1596 1671
@@ -1600,19 +1675,17 @@ static int cciss_eh_device_reset_handler(struct scsi_cmnd *scsicmd)
1600 return FAILED; 1675 return FAILED;
1601 ctlr = (*c)->ctlr; 1676 ctlr = (*c)->ctlr;
1602 printk(KERN_WARNING "cciss%d: resetting tape drive or medium changer.\n", ctlr); 1677 printk(KERN_WARNING "cciss%d: resetting tape drive or medium changer.\n", ctlr);
1603
1604 /* find the command that's giving us trouble */ 1678 /* find the command that's giving us trouble */
1605 cmd_in_trouble = (CommandList_struct *) scsicmd->host_scribble; 1679 cmd_in_trouble = (CommandList_struct *) scsicmd->host_scribble;
1606 if (cmd_in_trouble == NULL) { /* paranoia */ 1680 if (cmd_in_trouble == NULL) /* paranoia */
1607 return FAILED; 1681 return FAILED;
1608 } 1682 memcpy(lunaddr, &cmd_in_trouble->Header.LUN.LunAddrBytes[0], 8);
1609 /* send a reset to the SCSI LUN which the command was sent to */ 1683 /* send a reset to the SCSI LUN which the command was sent to */
1610 rc = sendcmd(CCISS_RESET_MSG, ctlr, NULL, 0, 2, 0, 0, 1684 rc = sendcmd(CCISS_RESET_MSG, ctlr, NULL, 0, 2, 0, 0, lunaddr,
1611 (unsigned char *) &cmd_in_trouble->Header.LUN.LunAddrBytes[0],
1612 TYPE_MSG); 1685 TYPE_MSG);
1613 /* sendcmd turned off interrupts on the board, turn 'em back on. */ 1686 /* sendcmd turned off interrupts on the board, turn 'em back on. */
1614 (*c)->access.set_intr_mask(*c, CCISS_INTR_ON); 1687 (*c)->access.set_intr_mask(*c, CCISS_INTR_ON);
1615 if (rc == 0) 1688 if (rc == 0 && wait_for_device_to_become_ready(*c, lunaddr) == 0)
1616 return SUCCESS; 1689 return SUCCESS;
1617 printk(KERN_WARNING "cciss%d: resetting device failed.\n", ctlr); 1690 printk(KERN_WARNING "cciss%d: resetting device failed.\n", ctlr);
1618 return FAILED; 1691 return FAILED;