aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/ata/libata-eh.c
diff options
context:
space:
mode:
authorTejun Heo <htejun@gmail.com>2007-11-27 05:28:59 -0500
committerJeff Garzik <jeff@garzik.org>2008-01-23 05:24:11 -0500
commit76326ac1ac1f524014ef36986fed97796b28ec6b (patch)
tree56c662db2f65303e256fedac9a26362b921d759c /drivers/ata/libata-eh.c
parent00115e0f5bc3bfdf3f3855ad89c8895f10458f92 (diff)
libata: implement fast speed down for unverified data transfer mode
It's very likely that the configured data transfer mode is the wrong one if device fails data transfers right after initial data transfer mode configuration (including NCQ on/off and xfermode). libata EH needs to speed down fast before upper layers give up on probing. This patch implement fast speed down rules to handle such cases better. Error occured while data transfer hasn't been verified trigger fast back-to-back speed down actions until data transfer works. This change will make cable mis-detection and other initial configuration problems corrected before partition scanning code gives up. Signed-off-by: Tejun Heo <htejun@gmail.com> Signed-off-by: Jeff Garzik <jeff@garzik.org>
Diffstat (limited to 'drivers/ata/libata-eh.c')
-rw-r--r--drivers/ata/libata-eh.c97
1 files changed, 83 insertions, 14 deletions
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 359a5ace8473..3a2f7ef3e600 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -50,16 +50,23 @@ enum {
50 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 50 ATA_EH_SPDN_NCQ_OFF = (1 << 0),
51 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 51 ATA_EH_SPDN_SPEED_DOWN = (1 << 1),
52 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 52 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2),
53 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3),
53 54
54 /* error flags */ 55 /* error flags */
55 ATA_EFLAG_IS_IO = (1 << 0), 56 ATA_EFLAG_IS_IO = (1 << 0),
57 ATA_EFLAG_DUBIOUS_XFER = (1 << 1),
56 58
57 /* error categories */ 59 /* error categories */
58 ATA_ECAT_NONE = 0, 60 ATA_ECAT_NONE = 0,
59 ATA_ECAT_ATA_BUS = 1, 61 ATA_ECAT_ATA_BUS = 1,
60 ATA_ECAT_TOUT_HSM = 2, 62 ATA_ECAT_TOUT_HSM = 2,
61 ATA_ECAT_UNK_DEV = 3, 63 ATA_ECAT_UNK_DEV = 3,
62 ATA_ECAT_NR = 4, 64 ATA_ECAT_DUBIOUS_ATA_BUS = 4,
65 ATA_ECAT_DUBIOUS_TOUT_HSM = 5,
66 ATA_ECAT_DUBIOUS_UNK_DEV = 6,
67 ATA_ECAT_NR = 7,
68
69 ATA_ECAT_DUBIOUS_BASE = ATA_ECAT_DUBIOUS_ATA_BUS,
63}; 70};
64 71
65/* Waiting in ->prereset can never be reliable. It's sometimes nice 72/* Waiting in ->prereset can never be reliable. It's sometimes nice
@@ -245,6 +252,15 @@ static void ata_ering_record(struct ata_ering *ering, unsigned int eflags,
245 ent->timestamp = get_jiffies_64(); 252 ent->timestamp = get_jiffies_64();
246} 253}
247 254
255static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering)
256{
257 struct ata_ering_entry *ent = &ering->ring[ering->cursor];
258
259 if (ent->err_mask)
260 return ent;
261 return NULL;
262}
263
248static void ata_ering_clear(struct ata_ering *ering) 264static void ata_ering_clear(struct ata_ering *ering)
249{ 265{
250 memset(ering, 0, sizeof(*ering)); 266 memset(ering, 0, sizeof(*ering));
@@ -1473,20 +1489,29 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
1473 return action; 1489 return action;
1474} 1490}
1475 1491
1476static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask) 1492static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask,
1493 int *xfer_ok)
1477{ 1494{
1495 int base = 0;
1496
1497 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER))
1498 *xfer_ok = 1;
1499
1500 if (!*xfer_ok)
1501 base = ATA_ECAT_DUBIOUS_BASE;
1502
1478 if (err_mask & AC_ERR_ATA_BUS) 1503 if (err_mask & AC_ERR_ATA_BUS)
1479 return ATA_ECAT_ATA_BUS; 1504 return base + ATA_ECAT_ATA_BUS;
1480 1505
1481 if (err_mask & AC_ERR_TIMEOUT) 1506 if (err_mask & AC_ERR_TIMEOUT)
1482 return ATA_ECAT_TOUT_HSM; 1507 return base + ATA_ECAT_TOUT_HSM;
1483 1508
1484 if (eflags & ATA_EFLAG_IS_IO) { 1509 if (eflags & ATA_EFLAG_IS_IO) {
1485 if (err_mask & AC_ERR_HSM) 1510 if (err_mask & AC_ERR_HSM)
1486 return ATA_ECAT_TOUT_HSM; 1511 return base + ATA_ECAT_TOUT_HSM;
1487 if ((err_mask & 1512 if ((err_mask &
1488 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1513 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
1489 return ATA_ECAT_UNK_DEV; 1514 return base + ATA_ECAT_UNK_DEV;
1490 } 1515 }
1491 1516
1492 return 0; 1517 return 0;
@@ -1494,18 +1519,22 @@ static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask)
1494 1519
1495struct speed_down_verdict_arg { 1520struct speed_down_verdict_arg {
1496 u64 since; 1521 u64 since;
1522 int xfer_ok;
1497 int nr_errors[ATA_ECAT_NR]; 1523 int nr_errors[ATA_ECAT_NR];
1498}; 1524};
1499 1525
1500static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1526static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
1501{ 1527{
1502 struct speed_down_verdict_arg *arg = void_arg; 1528 struct speed_down_verdict_arg *arg = void_arg;
1503 int cat = ata_eh_categorize_error(ent->eflags, ent->err_mask); 1529 int cat;
1504 1530
1505 if (ent->timestamp < arg->since) 1531 if (ent->timestamp < arg->since)
1506 return -1; 1532 return -1;
1507 1533
1534 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask,
1535 &arg->xfer_ok);
1508 arg->nr_errors[cat]++; 1536 arg->nr_errors[cat]++;
1537
1509 return 0; 1538 return 0;
1510} 1539}
1511 1540
@@ -1524,6 +1553,9 @@ static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
1524 * 1553 *
1525 * ECAT_UNK_DEV : Unknown DEV error for IO commands 1554 * ECAT_UNK_DEV : Unknown DEV error for IO commands
1526 * 1555 *
1556 * ECAT_DUBIOUS_* : Identical to above three but occurred while
1557 * data transfer hasn't been verified.
1558 *
1527 * Verdicts are 1559 * Verdicts are
1528 * 1560 *
1529 * NCQ_OFF : Turn off NCQ. 1561 * NCQ_OFF : Turn off NCQ.
@@ -1534,15 +1566,27 @@ static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
1534 * FALLBACK_TO_PIO : Fall back to PIO. 1566 * FALLBACK_TO_PIO : Fall back to PIO.
1535 * 1567 *
1536 * Even if multiple verdicts are returned, only one action is 1568 * Even if multiple verdicts are returned, only one action is
1537 * taken per error. ering is cleared after an action is taken. 1569 * taken per error. An action triggered by non-DUBIOUS errors
1570 * clears ering, while one triggered by DUBIOUS_* errors doesn't.
1571 * This is to expedite speed down decisions right after device is
1572 * initially configured.
1573 *
1574 * The followings are speed down rules. #1 and #2 deal with
1575 * DUBIOUS errors.
1538 * 1576 *
1539 * 1. If more than 6 ATA_BUS, TOUT_HSM or UNK_DEV errors 1577 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors
1578 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO.
1579 *
1580 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors
1581 * occurred during last 5 mins, NCQ_OFF.
1582 *
1583 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors
1540 * ocurred during last 5 mins, FALLBACK_TO_PIO 1584 * ocurred during last 5 mins, FALLBACK_TO_PIO
1541 * 1585 *
1542 * 2. If more than 3 TOUT_HSM or UNK_DEV errors occurred 1586 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred
1543 * during last 10 mins, NCQ_OFF. 1587 * during last 10 mins, NCQ_OFF.
1544 * 1588 *
1545 * 3. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 1589 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6
1546 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. 1590 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN.
1547 * 1591 *
1548 * LOCKING: 1592 * LOCKING:
@@ -1563,6 +1607,15 @@ static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev)
1563 arg.since = j64 - min(j64, j5mins); 1607 arg.since = j64 - min(j64, j5mins);
1564 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1608 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
1565 1609
1610 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] +
1611 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1)
1612 verdict |= ATA_EH_SPDN_SPEED_DOWN |
1613 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS;
1614
1615 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] +
1616 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1)
1617 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS;
1618
1566 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1619 if (arg.nr_errors[ATA_ECAT_ATA_BUS] +
1567 arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1620 arg.nr_errors[ATA_ECAT_TOUT_HSM] +
1568 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1621 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6)
@@ -1606,11 +1659,12 @@ static unsigned int ata_eh_speed_down(struct ata_device *dev,
1606 unsigned int eflags, unsigned int err_mask) 1659 unsigned int eflags, unsigned int err_mask)
1607{ 1660{
1608 struct ata_link *link = dev->link; 1661 struct ata_link *link = dev->link;
1662 int xfer_ok = 0;
1609 unsigned int verdict; 1663 unsigned int verdict;
1610 unsigned int action = 0; 1664 unsigned int action = 0;
1611 1665
1612 /* don't bother if Cat-0 error */ 1666 /* don't bother if Cat-0 error */
1613 if (ata_eh_categorize_error(eflags, err_mask) == 0) 1667 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0)
1614 return 0; 1668 return 0;
1615 1669
1616 /* record error and determine whether speed down is necessary */ 1670 /* record error and determine whether speed down is necessary */
@@ -1673,7 +1727,8 @@ static unsigned int ata_eh_speed_down(struct ata_device *dev,
1673 return 0; 1727 return 0;
1674 done: 1728 done:
1675 /* device has been slowed down, blow error history */ 1729 /* device has been slowed down, blow error history */
1676 ata_ering_clear(&dev->ering); 1730 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS))
1731 ata_ering_clear(&dev->ering);
1677 return action; 1732 return action;
1678} 1733}
1679 1734
@@ -1783,8 +1838,11 @@ static void ata_eh_link_autopsy(struct ata_link *link)
1783 ata_dev_enabled(link->device)))) 1838 ata_dev_enabled(link->device))))
1784 dev = link->device; 1839 dev = link->device;
1785 1840
1786 if (dev) 1841 if (dev) {
1842 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER)
1843 eflags |= ATA_EFLAG_DUBIOUS_XFER;
1787 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); 1844 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask);
1845 }
1788 1846
1789 DPRINTK("EXIT\n"); 1847 DPRINTK("EXIT\n");
1790} 1848}
@@ -2390,6 +2448,17 @@ int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
2390 struct ata_device *dev; 2448 struct ata_device *dev;
2391 int rc; 2449 int rc;
2392 2450
2451 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */
2452 ata_link_for_each_dev(dev, link) {
2453 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) {
2454 struct ata_ering_entry *ent;
2455
2456 ent = ata_ering_top(&dev->ering);
2457 if (ent)
2458 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER;
2459 }
2460 }
2461
2393 /* has private set_mode? */ 2462 /* has private set_mode? */
2394 if (ap->ops->set_mode) 2463 if (ap->ops->set_mode)
2395 rc = ap->ops->set_mode(link, r_failed_dev); 2464 rc = ap->ops->set_mode(link, r_failed_dev);