aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/scsi/isci/task.c
diff options
context:
space:
mode:
authorJeff Skirvin <jeffrey.d.skirvin@intel.com>2011-06-21 15:16:33 -0400
committerDan Williams <dan.j.williams@intel.com>2011-07-03 07:04:50 -0400
commit61aaff49e20fdb700f1300a49962bc76effc77fc (patch)
treee00556108ba35fe5610673d1f10cde3440e4ed12 /drivers/scsi/isci/task.c
parentff717ab05f0c33f93514eccea6dfe1a15983e1d1 (diff)
isci: filter broadcast change notifications during SMP phy resets
When resetting a sata device in the domain we have seen occasions where libsas prematurely marks a device gone in the time it takes for the device to re-establish the link. This plays badly with software raid arrays. Other libsas drivers have non-uniform delays in their reset handlers to try to cover this condition, but not sufficient to close the hole. Given that a sata device can take many seconds to recover we filter bcns and poll for the device reattach state before notifying libsas that the port needs the domain to be rediscovered. Once this has been proven out at the lldd level we can think about uplevelling this feature to a common implementation in libsas. Signed-off-by: Jeff Skirvin <jeffrey.d.skirvin@intel.com> [ use kzalloc instead of kmem_cache ] Signed-off-by: Dave Jiang <dave.jiang@intel.com> [ use eventq and time macros ] Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'drivers/scsi/isci/task.c')
-rw-r--r--drivers/scsi/isci/task.c259
1 files changed, 258 insertions, 1 deletions
diff --git a/drivers/scsi/isci/task.c b/drivers/scsi/isci/task.c
index 69f17b98e4fa..709c08171743 100644
--- a/drivers/scsi/isci/task.c
+++ b/drivers/scsi/isci/task.c
@@ -56,6 +56,7 @@
56#include <linux/completion.h> 56#include <linux/completion.h>
57#include <linux/irqflags.h> 57#include <linux/irqflags.h>
58#include "sas.h" 58#include "sas.h"
59#include <scsi/libsas.h>
59#include "remote_device.h" 60#include "remote_device.h"
60#include "remote_node_context.h" 61#include "remote_node_context.h"
61#include "isci.h" 62#include "isci.h"
@@ -1397,11 +1398,250 @@ isci_task_request_complete(struct isci_host *ihost,
1397 complete(tmf_complete); 1398 complete(tmf_complete);
1398} 1399}
1399 1400
1401static void isci_smp_task_timedout(unsigned long _task)
1402{
1403 struct sas_task *task = (void *) _task;
1404 unsigned long flags;
1405
1406 spin_lock_irqsave(&task->task_state_lock, flags);
1407 if (!(task->task_state_flags & SAS_TASK_STATE_DONE))
1408 task->task_state_flags |= SAS_TASK_STATE_ABORTED;
1409 spin_unlock_irqrestore(&task->task_state_lock, flags);
1410
1411 complete(&task->completion);
1412}
1413
1414static void isci_smp_task_done(struct sas_task *task)
1415{
1416 if (!del_timer(&task->timer))
1417 return;
1418 complete(&task->completion);
1419}
1420
1421static struct sas_task *isci_alloc_task(void)
1422{
1423 struct sas_task *task = kzalloc(sizeof(*task), GFP_KERNEL);
1424
1425 if (task) {
1426 INIT_LIST_HEAD(&task->list);
1427 spin_lock_init(&task->task_state_lock);
1428 task->task_state_flags = SAS_TASK_STATE_PENDING;
1429 init_timer(&task->timer);
1430 init_completion(&task->completion);
1431 }
1432
1433 return task;
1434}
1435
1436static void isci_free_task(struct isci_host *ihost, struct sas_task *task)
1437{
1438 if (task) {
1439 BUG_ON(!list_empty(&task->list));
1440 kfree(task);
1441 }
1442}
1443
1444static int isci_smp_execute_task(struct isci_host *ihost,
1445 struct domain_device *dev, void *req,
1446 int req_size, void *resp, int resp_size)
1447{
1448 int res, retry;
1449 struct sas_task *task = NULL;
1450
1451 for (retry = 0; retry < 3; retry++) {
1452 task = isci_alloc_task();
1453 if (!task)
1454 return -ENOMEM;
1455
1456 task->dev = dev;
1457 task->task_proto = dev->tproto;
1458 sg_init_one(&task->smp_task.smp_req, req, req_size);
1459 sg_init_one(&task->smp_task.smp_resp, resp, resp_size);
1460
1461 task->task_done = isci_smp_task_done;
1462
1463 task->timer.data = (unsigned long) task;
1464 task->timer.function = isci_smp_task_timedout;
1465 task->timer.expires = jiffies + 10*HZ;
1466 add_timer(&task->timer);
1467
1468 res = isci_task_execute_task(task, 1, GFP_KERNEL);
1469
1470 if (res) {
1471 del_timer(&task->timer);
1472 dev_err(&ihost->pdev->dev,
1473 "%s: executing SMP task failed:%d\n",
1474 __func__, res);
1475 goto ex_err;
1476 }
1477
1478 wait_for_completion(&task->completion);
1479 res = -ECOMM;
1480 if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) {
1481 dev_err(&ihost->pdev->dev,
1482 "%s: smp task timed out or aborted\n",
1483 __func__);
1484 isci_task_abort_task(task);
1485 if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
1486 dev_err(&ihost->pdev->dev,
1487 "%s: SMP task aborted and not done\n",
1488 __func__);
1489 goto ex_err;
1490 }
1491 }
1492 if (task->task_status.resp == SAS_TASK_COMPLETE &&
1493 task->task_status.stat == SAM_STAT_GOOD) {
1494 res = 0;
1495 break;
1496 }
1497 if (task->task_status.resp == SAS_TASK_COMPLETE &&
1498 task->task_status.stat == SAS_DATA_UNDERRUN) {
1499 /* no error, but return the number of bytes of
1500 * underrun */
1501 res = task->task_status.residual;
1502 break;
1503 }
1504 if (task->task_status.resp == SAS_TASK_COMPLETE &&
1505 task->task_status.stat == SAS_DATA_OVERRUN) {
1506 res = -EMSGSIZE;
1507 break;
1508 } else {
1509 dev_err(&ihost->pdev->dev,
1510 "%s: task to dev %016llx response: 0x%x "
1511 "status 0x%x\n", __func__,
1512 SAS_ADDR(dev->sas_addr),
1513 task->task_status.resp,
1514 task->task_status.stat);
1515 isci_free_task(ihost, task);
1516 task = NULL;
1517 }
1518 }
1519ex_err:
1520 BUG_ON(retry == 3 && task != NULL);
1521 isci_free_task(ihost, task);
1522 return res;
1523}
1524
1525#define DISCOVER_REQ_SIZE 16
1526#define DISCOVER_RESP_SIZE 56
1527
1528int isci_smp_get_phy_attached_dev_type(struct isci_host *ihost,
1529 struct domain_device *dev,
1530 int phy_id, int *adt)
1531{
1532 struct smp_resp *disc_resp;
1533 u8 *disc_req;
1534 int res;
1535
1536 disc_resp = kzalloc(DISCOVER_RESP_SIZE, GFP_KERNEL);
1537 if (!disc_resp)
1538 return -ENOMEM;
1539
1540 disc_req = kzalloc(DISCOVER_REQ_SIZE, GFP_KERNEL);
1541 if (disc_req) {
1542 disc_req[0] = SMP_REQUEST;
1543 disc_req[1] = SMP_DISCOVER;
1544 disc_req[9] = phy_id;
1545 } else {
1546 kfree(disc_resp);
1547 return -ENOMEM;
1548 }
1549 res = isci_smp_execute_task(ihost, dev, disc_req, DISCOVER_REQ_SIZE,
1550 disc_resp, DISCOVER_RESP_SIZE);
1551 if (!res) {
1552 if (disc_resp->result != SMP_RESP_FUNC_ACC)
1553 res = disc_resp->result;
1554 else
1555 *adt = disc_resp->disc.attached_dev_type;
1556 }
1557 kfree(disc_req);
1558 kfree(disc_resp);
1559
1560 return res;
1561}
1562
1563static void isci_wait_for_smp_phy_reset(struct isci_remote_device *idev, int phy_num)
1564{
1565 struct domain_device *dev = idev->domain_dev;
1566 struct isci_port *iport = idev->isci_port;
1567 struct isci_host *ihost = iport->isci_host;
1568 int res, iteration = 0, attached_device_type;
1569 #define STP_WAIT_MSECS 25000
1570 unsigned long tmo = msecs_to_jiffies(STP_WAIT_MSECS);
1571 unsigned long deadline = jiffies + tmo;
1572 enum {
1573 SMP_PHYWAIT_PHYDOWN,
1574 SMP_PHYWAIT_PHYUP,
1575 SMP_PHYWAIT_DONE
1576 } phy_state = SMP_PHYWAIT_PHYDOWN;
1577
1578 /* While there is time, wait for the phy to go away and come back */
1579 while (time_is_after_jiffies(deadline) && phy_state != SMP_PHYWAIT_DONE) {
1580 int event = atomic_read(&iport->event);
1581
1582 ++iteration;
1583
1584 tmo = wait_event_timeout(ihost->eventq,
1585 event != atomic_read(&iport->event) ||
1586 !test_bit(IPORT_BCN_BLOCKED, &iport->flags),
1587 tmo);
1588 /* link down, stop polling */
1589 if (!test_bit(IPORT_BCN_BLOCKED, &iport->flags))
1590 break;
1591
1592 dev_dbg(&ihost->pdev->dev,
1593 "%s: iport %p, iteration %d,"
1594 " phase %d: time_remaining %lu, bcns = %d\n",
1595 __func__, iport, iteration, phy_state,
1596 tmo, test_bit(IPORT_BCN_PENDING, &iport->flags));
1597
1598 res = isci_smp_get_phy_attached_dev_type(ihost, dev, phy_num,
1599 &attached_device_type);
1600 tmo = deadline - jiffies;
1601
1602 if (res) {
1603 dev_warn(&ihost->pdev->dev,
1604 "%s: iteration %d, phase %d:"
1605 " SMP error=%d, time_remaining=%lu\n",
1606 __func__, iteration, phy_state, res, tmo);
1607 break;
1608 }
1609 dev_dbg(&ihost->pdev->dev,
1610 "%s: iport %p, iteration %d,"
1611 " phase %d: time_remaining %lu, bcns = %d, "
1612 "attdevtype = %x\n",
1613 __func__, iport, iteration, phy_state,
1614 tmo, test_bit(IPORT_BCN_PENDING, &iport->flags),
1615 attached_device_type);
1616
1617 switch (phy_state) {
1618 case SMP_PHYWAIT_PHYDOWN:
1619 /* Has the device gone away? */
1620 if (!attached_device_type)
1621 phy_state = SMP_PHYWAIT_PHYUP;
1622
1623 break;
1624
1625 case SMP_PHYWAIT_PHYUP:
1626 /* Has the device come back? */
1627 if (attached_device_type)
1628 phy_state = SMP_PHYWAIT_DONE;
1629 break;
1630
1631 case SMP_PHYWAIT_DONE:
1632 break;
1633 }
1634
1635 }
1636 dev_dbg(&ihost->pdev->dev, "%s: done\n", __func__);
1637}
1638
1400static int isci_reset_device(struct domain_device *dev, int hard_reset) 1639static int isci_reset_device(struct domain_device *dev, int hard_reset)
1401{ 1640{
1402 struct isci_remote_device *idev = dev->lldd_dev; 1641 struct isci_remote_device *idev = dev->lldd_dev;
1403 struct sas_phy *phy = sas_find_local_phy(dev); 1642 struct sas_phy *phy = sas_find_local_phy(dev);
1404 struct isci_host *ihost = dev_to_ihost(dev); 1643 struct isci_host *ihost = dev_to_ihost(dev);
1644 struct isci_port *iport = idev->isci_port;
1405 enum sci_status status; 1645 enum sci_status status;
1406 unsigned long flags; 1646 unsigned long flags;
1407 int rc; 1647 int rc;
@@ -1432,6 +1672,10 @@ static int isci_reset_device(struct domain_device *dev, int hard_reset)
1432 /* Make sure all pending requests are able to be fully terminated. */ 1672 /* Make sure all pending requests are able to be fully terminated. */
1433 isci_device_clear_reset_pending(ihost, idev); 1673 isci_device_clear_reset_pending(ihost, idev);
1434 1674
1675 /* If this is a device on an expander, disable BCN processing. */
1676 if (!scsi_is_sas_phy_local(phy))
1677 set_bit(IPORT_BCN_BLOCKED, &iport->flags);
1678
1435 rc = sas_phy_reset(phy, hard_reset); 1679 rc = sas_phy_reset(phy, hard_reset);
1436 1680
1437 /* Terminate in-progress I/O now. */ 1681 /* Terminate in-progress I/O now. */
@@ -1442,7 +1686,20 @@ static int isci_reset_device(struct domain_device *dev, int hard_reset)
1442 status = scic_remote_device_reset_complete(&idev->sci); 1686 status = scic_remote_device_reset_complete(&idev->sci);
1443 spin_unlock_irqrestore(&ihost->scic_lock, flags); 1687 spin_unlock_irqrestore(&ihost->scic_lock, flags);
1444 1688
1445 msleep(2000); /* just like mvsas */ 1689 /* If this is a device on an expander, bring the phy back up. */
1690 if (!scsi_is_sas_phy_local(phy)) {
1691 /* A phy reset will cause the device to go away then reappear.
1692 * Since libsas will take action on incoming BCNs (eg. remove
1693 * a device going through an SMP phy-control driven reset),
1694 * we need to wait until the phy comes back up before letting
1695 * discovery proceed in libsas.
1696 */
1697 isci_wait_for_smp_phy_reset(idev, phy->number);
1698
1699 spin_lock_irqsave(&ihost->scic_lock, flags);
1700 isci_port_bcn_enable(ihost, idev->isci_port);
1701 spin_unlock_irqrestore(&ihost->scic_lock, flags);
1702 }
1446 1703
1447 if (status != SCI_SUCCESS) { 1704 if (status != SCI_SUCCESS) {
1448 dev_warn(&ihost->pdev->dev, 1705 dev_warn(&ihost->pdev->dev,