aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/ata/libata-eh.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/ata/libata-eh.c')
-rw-r--r--drivers/ata/libata-eh.c97
1 files changed, 83 insertions, 14 deletions
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 359a5ace8473..3a2f7ef3e600 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -50,16 +50,23 @@ enum {
50 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 50 ATA_EH_SPDN_NCQ_OFF = (1 << 0),
51 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 51 ATA_EH_SPDN_SPEED_DOWN = (1 << 1),
52 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 52 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2),
53 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3),
53 54
54 /* error flags */ 55 /* error flags */
55 ATA_EFLAG_IS_IO = (1 << 0), 56 ATA_EFLAG_IS_IO = (1 << 0),
57 ATA_EFLAG_DUBIOUS_XFER = (1 << 1),
56 58
57 /* error categories */ 59 /* error categories */
58 ATA_ECAT_NONE = 0, 60 ATA_ECAT_NONE = 0,
59 ATA_ECAT_ATA_BUS = 1, 61 ATA_ECAT_ATA_BUS = 1,
60 ATA_ECAT_TOUT_HSM = 2, 62 ATA_ECAT_TOUT_HSM = 2,
61 ATA_ECAT_UNK_DEV = 3, 63 ATA_ECAT_UNK_DEV = 3,
62 ATA_ECAT_NR = 4, 64 ATA_ECAT_DUBIOUS_ATA_BUS = 4,
65 ATA_ECAT_DUBIOUS_TOUT_HSM = 5,
66 ATA_ECAT_DUBIOUS_UNK_DEV = 6,
67 ATA_ECAT_NR = 7,
68
69 ATA_ECAT_DUBIOUS_BASE = ATA_ECAT_DUBIOUS_ATA_BUS,
63}; 70};
64 71
65/* Waiting in ->prereset can never be reliable. It's sometimes nice 72/* Waiting in ->prereset can never be reliable. It's sometimes nice
@@ -245,6 +252,15 @@ static void ata_ering_record(struct ata_ering *ering, unsigned int eflags,
245 ent->timestamp = get_jiffies_64(); 252 ent->timestamp = get_jiffies_64();
246} 253}
247 254
255static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering)
256{
257 struct ata_ering_entry *ent = &ering->ring[ering->cursor];
258
259 if (ent->err_mask)
260 return ent;
261 return NULL;
262}
263
248static void ata_ering_clear(struct ata_ering *ering) 264static void ata_ering_clear(struct ata_ering *ering)
249{ 265{
250 memset(ering, 0, sizeof(*ering)); 266 memset(ering, 0, sizeof(*ering));
@@ -1473,20 +1489,29 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
1473 return action; 1489 return action;
1474} 1490}
1475 1491
1476static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask) 1492static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask,
1493 int *xfer_ok)
1477{ 1494{
1495 int base = 0;
1496
1497 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER))
1498 *xfer_ok = 1;
1499
1500 if (!*xfer_ok)
1501 base = ATA_ECAT_DUBIOUS_BASE;
1502
1478 if (err_mask & AC_ERR_ATA_BUS) 1503 if (err_mask & AC_ERR_ATA_BUS)
1479 return ATA_ECAT_ATA_BUS; 1504 return base + ATA_ECAT_ATA_BUS;
1480 1505
1481 if (err_mask & AC_ERR_TIMEOUT) 1506 if (err_mask & AC_ERR_TIMEOUT)
1482 return ATA_ECAT_TOUT_HSM; 1507 return base + ATA_ECAT_TOUT_HSM;
1483 1508
1484 if (eflags & ATA_EFLAG_IS_IO) { 1509 if (eflags & ATA_EFLAG_IS_IO) {
1485 if (err_mask & AC_ERR_HSM) 1510 if (err_mask & AC_ERR_HSM)
1486 return ATA_ECAT_TOUT_HSM; 1511 return base + ATA_ECAT_TOUT_HSM;
1487 if ((err_mask & 1512 if ((err_mask &
1488 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1513 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
1489 return ATA_ECAT_UNK_DEV; 1514 return base + ATA_ECAT_UNK_DEV;
1490 } 1515 }
1491 1516
1492 return 0; 1517 return 0;
@@ -1494,18 +1519,22 @@ static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask)
1494 1519
1495struct speed_down_verdict_arg { 1520struct speed_down_verdict_arg {
1496 u64 since; 1521 u64 since;
1522 int xfer_ok;
1497 int nr_errors[ATA_ECAT_NR]; 1523 int nr_errors[ATA_ECAT_NR];
1498}; 1524};
1499 1525
1500static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1526static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
1501{ 1527{
1502 struct speed_down_verdict_arg *arg = void_arg; 1528 struct speed_down_verdict_arg *arg = void_arg;
1503 int cat = ata_eh_categorize_error(ent->eflags, ent->err_mask); 1529 int cat;
1504 1530
1505 if (ent->timestamp < arg->since) 1531 if (ent->timestamp < arg->since)
1506 return -1; 1532 return -1;
1507 1533
1534 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask,
1535 &arg->xfer_ok);
1508 arg->nr_errors[cat]++; 1536 arg->nr_errors[cat]++;
1537
1509 return 0; 1538 return 0;
1510} 1539}
1511 1540
@@ -1524,6 +1553,9 @@ static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
1524 * 1553 *
1525 * ECAT_UNK_DEV : Unknown DEV error for IO commands 1554 * ECAT_UNK_DEV : Unknown DEV error for IO commands
1526 * 1555 *
1556 * ECAT_DUBIOUS_* : Identical to above three but occurred while
1557 * data transfer hasn't been verified.
1558 *
1527 * Verdicts are 1559 * Verdicts are
1528 * 1560 *
1529 * NCQ_OFF : Turn off NCQ. 1561 * NCQ_OFF : Turn off NCQ.
@@ -1534,15 +1566,27 @@ static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
1534 * FALLBACK_TO_PIO : Fall back to PIO. 1566 * FALLBACK_TO_PIO : Fall back to PIO.
1535 * 1567 *
1536 * Even if multiple verdicts are returned, only one action is 1568 * Even if multiple verdicts are returned, only one action is
1537 * taken per error. ering is cleared after an action is taken. 1569 * taken per error. An action triggered by non-DUBIOUS errors
1570 * clears ering, while one triggered by DUBIOUS_* errors doesn't.
1571 * This is to expedite speed down decisions right after device is
1572 * initially configured.
1573 *
1574 * The followings are speed down rules. #1 and #2 deal with
1575 * DUBIOUS errors.
1538 * 1576 *
1539 * 1. If more than 6 ATA_BUS, TOUT_HSM or UNK_DEV errors 1577 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors
1578 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO.
1579 *
1580 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors
1581 * occurred during last 5 mins, NCQ_OFF.
1582 *
1583 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors
1540 * ocurred during last 5 mins, FALLBACK_TO_PIO 1584 * ocurred during last 5 mins, FALLBACK_TO_PIO
1541 * 1585 *
1542 * 2. If more than 3 TOUT_HSM or UNK_DEV errors occurred 1586 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred
1543 * during last 10 mins, NCQ_OFF. 1587 * during last 10 mins, NCQ_OFF.
1544 * 1588 *
1545 * 3. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 1589 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6
1546 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. 1590 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN.
1547 * 1591 *
1548 * LOCKING: 1592 * LOCKING:
@@ -1563,6 +1607,15 @@ static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev)
1563 arg.since = j64 - min(j64, j5mins); 1607 arg.since = j64 - min(j64, j5mins);
1564 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1608 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
1565 1609
1610 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] +
1611 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1)
1612 verdict |= ATA_EH_SPDN_SPEED_DOWN |
1613 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS;
1614
1615 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] +
1616 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1)
1617 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS;
1618
1566 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1619 if (arg.nr_errors[ATA_ECAT_ATA_BUS] +
1567 arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1620 arg.nr_errors[ATA_ECAT_TOUT_HSM] +
1568 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1621 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6)
@@ -1606,11 +1659,12 @@ static unsigned int ata_eh_speed_down(struct ata_device *dev,
1606 unsigned int eflags, unsigned int err_mask) 1659 unsigned int eflags, unsigned int err_mask)
1607{ 1660{
1608 struct ata_link *link = dev->link; 1661 struct ata_link *link = dev->link;
1662 int xfer_ok = 0;
1609 unsigned int verdict; 1663 unsigned int verdict;
1610 unsigned int action = 0; 1664 unsigned int action = 0;
1611 1665
1612 /* don't bother if Cat-0 error */ 1666 /* don't bother if Cat-0 error */
1613 if (ata_eh_categorize_error(eflags, err_mask) == 0) 1667 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0)
1614 return 0; 1668 return 0;
1615 1669
1616 /* record error and determine whether speed down is necessary */ 1670 /* record error and determine whether speed down is necessary */
@@ -1673,7 +1727,8 @@ static unsigned int ata_eh_speed_down(struct ata_device *dev,
1673 return 0; 1727 return 0;
1674 done: 1728 done:
1675 /* device has been slowed down, blow error history */ 1729 /* device has been slowed down, blow error history */
1676 ata_ering_clear(&dev->ering); 1730 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS))
1731 ata_ering_clear(&dev->ering);
1677 return action; 1732 return action;
1678} 1733}
1679 1734
@@ -1783,8 +1838,11 @@ static void ata_eh_link_autopsy(struct ata_link *link)
1783 ata_dev_enabled(link->device)))) 1838 ata_dev_enabled(link->device))))
1784 dev = link->device; 1839 dev = link->device;
1785 1840
1786 if (dev) 1841 if (dev) {
1842 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER)
1843 eflags |= ATA_EFLAG_DUBIOUS_XFER;
1787 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); 1844 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask);
1845 }
1788 1846
1789 DPRINTK("EXIT\n"); 1847 DPRINTK("EXIT\n");
1790} 1848}
@@ -2390,6 +2448,17 @@ int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
2390 struct ata_device *dev; 2448 struct ata_device *dev;
2391 int rc; 2449 int rc;
2392 2450
2451 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */
2452 ata_link_for_each_dev(dev, link) {
2453 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) {
2454 struct ata_ering_entry *ent;
2455
2456 ent = ata_ering_top(&dev->ering);
2457 if (ent)
2458 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER;
2459 }
2460 }
2461
2393 /* has private set_mode? */ 2462 /* has private set_mode? */
2394 if (ap->ops->set_mode) 2463 if (ap->ops->set_mode)
2395 rc = ap->ops->set_mode(link, r_failed_dev); 2464 rc = ap->ops->set_mode(link, r_failed_dev);