aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/ata/libata-eh.c117
-rw-r--r--include/linux/libata.h2
2 files changed, 74 insertions, 45 deletions
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index ebab75958900..b01ade102727 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -46,9 +46,20 @@
46#include "libata.h" 46#include "libata.h"
47 47
48enum { 48enum {
49 /* speed down verdicts */
49 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 50 ATA_EH_SPDN_NCQ_OFF = (1 << 0),
50 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 51 ATA_EH_SPDN_SPEED_DOWN = (1 << 1),
51 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 52 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2),
53
54 /* error flags */
55 ATA_EFLAG_IS_IO = (1 << 0),
56
57 /* error categories */
58 ATA_ECAT_NONE = 0,
59 ATA_ECAT_ATA_BUS = 1,
60 ATA_ECAT_TOUT_HSM = 2,
61 ATA_ECAT_UNK_DEV = 3,
62 ATA_ECAT_NR = 4,
52}; 63};
53 64
54/* Waiting in ->prereset can never be reliable. It's sometimes nice 65/* Waiting in ->prereset can never be reliable. It's sometimes nice
@@ -218,7 +229,7 @@ void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset,
218 229
219#endif /* CONFIG_PCI */ 230#endif /* CONFIG_PCI */
220 231
221static void ata_ering_record(struct ata_ering *ering, int is_io, 232static void ata_ering_record(struct ata_ering *ering, unsigned int eflags,
222 unsigned int err_mask) 233 unsigned int err_mask)
223{ 234{
224 struct ata_ering_entry *ent; 235 struct ata_ering_entry *ent;
@@ -229,7 +240,7 @@ static void ata_ering_record(struct ata_ering *ering, int is_io,
229 ering->cursor %= ATA_ERING_SIZE; 240 ering->cursor %= ATA_ERING_SIZE;
230 241
231 ent = &ering->ring[ering->cursor]; 242 ent = &ering->ring[ering->cursor];
232 ent->is_io = is_io; 243 ent->eflags = eflags;
233 ent->err_mask = err_mask; 244 ent->err_mask = err_mask;
234 ent->timestamp = get_jiffies_64(); 245 ent->timestamp = get_jiffies_64();
235} 246}
@@ -1451,20 +1462,20 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
1451 return action; 1462 return action;
1452} 1463}
1453 1464
1454static int ata_eh_categorize_error(int is_io, unsigned int err_mask) 1465static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask)
1455{ 1466{
1456 if (err_mask & AC_ERR_ATA_BUS) 1467 if (err_mask & AC_ERR_ATA_BUS)
1457 return 1; 1468 return ATA_ECAT_ATA_BUS;
1458 1469
1459 if (err_mask & AC_ERR_TIMEOUT) 1470 if (err_mask & AC_ERR_TIMEOUT)
1460 return 2; 1471 return ATA_ECAT_TOUT_HSM;
1461 1472
1462 if (is_io) { 1473 if (eflags & ATA_EFLAG_IS_IO) {
1463 if (err_mask & AC_ERR_HSM) 1474 if (err_mask & AC_ERR_HSM)
1464 return 2; 1475 return ATA_ECAT_TOUT_HSM;
1465 if ((err_mask & 1476 if ((err_mask &
1466 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1477 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
1467 return 3; 1478 return ATA_ECAT_UNK_DEV;
1468 } 1479 }
1469 1480
1470 return 0; 1481 return 0;
@@ -1472,13 +1483,13 @@ static int ata_eh_categorize_error(int is_io, unsigned int err_mask)
1472 1483
1473struct speed_down_verdict_arg { 1484struct speed_down_verdict_arg {
1474 u64 since; 1485 u64 since;
1475 int nr_errors[4]; 1486 int nr_errors[ATA_ECAT_NR];
1476}; 1487};
1477 1488
1478static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1489static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
1479{ 1490{
1480 struct speed_down_verdict_arg *arg = void_arg; 1491 struct speed_down_verdict_arg *arg = void_arg;
1481 int cat = ata_eh_categorize_error(ent->is_io, ent->err_mask); 1492 int cat = ata_eh_categorize_error(ent->eflags, ent->err_mask);
1482 1493
1483 if (ent->timestamp < arg->since) 1494 if (ent->timestamp < arg->since)
1484 return -1; 1495 return -1;
@@ -1495,22 +1506,33 @@ static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
1495 * whether NCQ needs to be turned off, transfer speed should be 1506 * whether NCQ needs to be turned off, transfer speed should be
1496 * stepped down, or falling back to PIO is necessary. 1507 * stepped down, or falling back to PIO is necessary.
1497 * 1508 *
1498 * Cat-1 is ATA_BUS error for any command. 1509 * ECAT_ATA_BUS : ATA_BUS error for any command
1510 *
1511 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for
1512 * IO commands
1513 *
1514 * ECAT_UNK_DEV : Unknown DEV error for IO commands
1515 *
1516 * Verdicts are
1499 * 1517 *
1500 * Cat-2 is TIMEOUT for any command or HSM violation for known 1518 * NCQ_OFF : Turn off NCQ.
1501 * supported commands.
1502 * 1519 *
1503 * Cat-3 is is unclassified DEV error for known supported 1520 * SPEED_DOWN : Speed down transfer speed but don't fall back
1504 * command. 1521 * to PIO.
1505 * 1522 *
1506 * NCQ needs to be turned off if there have been more than 3 1523 * FALLBACK_TO_PIO : Fall back to PIO.
1507 * Cat-2 + Cat-3 errors during last 10 minutes.
1508 * 1524 *
1509 * Speed down is necessary if there have been more than 3 Cat-1 + 1525 * Even if multiple verdicts are returned, only one action is
1510 * Cat-2 errors or 10 Cat-3 errors during last 10 minutes. 1526 * taken per error. ering is cleared after an action is taken.
1511 * 1527 *
1512 * Falling back to PIO mode is necessary if there have been more 1528 * 1. If more than 10 ATA_BUS, TOUT_HSM or UNK_DEV errors
1513 * than 10 Cat-1 + Cat-2 + Cat-3 errors during last 5 minutes. 1529 * ocurred during last 5 mins, FALLBACK_TO_PIO
1530 *
1531 * 2. If more than 3 TOUT_HSM or UNK_DEV errors occurred
1532 * during last 10 mins, NCQ_OFF.
1533 *
1534 * 3. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 10
1535 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN.
1514 * 1536 *
1515 * LOCKING: 1537 * LOCKING:
1516 * Inherited from caller. 1538 * Inherited from caller.
@@ -1525,23 +1547,29 @@ static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev)
1525 struct speed_down_verdict_arg arg; 1547 struct speed_down_verdict_arg arg;
1526 unsigned int verdict = 0; 1548 unsigned int verdict = 0;
1527 1549
1528 /* scan past 10 mins of error history */ 1550 /* scan past 5 mins of error history */
1529 memset(&arg, 0, sizeof(arg)); 1551 memset(&arg, 0, sizeof(arg));
1530 arg.since = j64 - min(j64, j10mins); 1552 arg.since = j64 - min(j64, j5mins);
1531 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1553 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
1532 1554
1533 if (arg.nr_errors[2] + arg.nr_errors[3] > 3) 1555 if (arg.nr_errors[ATA_ECAT_ATA_BUS] +
1534 verdict |= ATA_EH_SPDN_NCQ_OFF; 1556 arg.nr_errors[ATA_ECAT_TOUT_HSM] +
1535 if (arg.nr_errors[1] + arg.nr_errors[2] > 3 || arg.nr_errors[3] > 10) 1557 arg.nr_errors[ATA_ECAT_UNK_DEV] > 10)
1536 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1558 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO;
1537 1559
1538 /* scan past 3 mins of error history */ 1560 /* scan past 10 mins of error history */
1539 memset(&arg, 0, sizeof(arg)); 1561 memset(&arg, 0, sizeof(arg));
1540 arg.since = j64 - min(j64, j5mins); 1562 arg.since = j64 - min(j64, j10mins);
1541 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1563 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
1542 1564
1543 if (arg.nr_errors[1] + arg.nr_errors[2] + arg.nr_errors[3] > 10) 1565 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] +
1544 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1566 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3)
1567 verdict |= ATA_EH_SPDN_NCQ_OFF;
1568
1569 if (arg.nr_errors[ATA_ECAT_ATA_BUS] +
1570 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 ||
1571 arg.nr_errors[ATA_ECAT_UNK_DEV] > 10)
1572 verdict |= ATA_EH_SPDN_SPEED_DOWN;
1545 1573
1546 return verdict; 1574 return verdict;
1547} 1575}
@@ -1549,7 +1577,7 @@ static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev)
1549/** 1577/**
1550 * ata_eh_speed_down - record error and speed down if necessary 1578 * ata_eh_speed_down - record error and speed down if necessary
1551 * @dev: Failed device 1579 * @dev: Failed device
1552 * @is_io: Did the device fail during normal IO? 1580 * @eflags: mask of ATA_EFLAG_* flags
1553 * @err_mask: err_mask of the error 1581 * @err_mask: err_mask of the error
1554 * 1582 *
1555 * Record error and examine error history to determine whether 1583 * Record error and examine error history to determine whether
@@ -1563,18 +1591,19 @@ static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev)
1563 * RETURNS: 1591 * RETURNS:
1564 * Determined recovery action. 1592 * Determined recovery action.
1565 */ 1593 */
1566static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io, 1594static unsigned int ata_eh_speed_down(struct ata_device *dev,
1567 unsigned int err_mask) 1595 unsigned int eflags, unsigned int err_mask)
1568{ 1596{
1597 struct ata_link *link = dev->link;
1569 unsigned int verdict; 1598 unsigned int verdict;
1570 unsigned int action = 0; 1599 unsigned int action = 0;
1571 1600
1572 /* don't bother if Cat-0 error */ 1601 /* don't bother if Cat-0 error */
1573 if (ata_eh_categorize_error(is_io, err_mask) == 0) 1602 if (ata_eh_categorize_error(eflags, err_mask) == 0)
1574 return 0; 1603 return 0;
1575 1604
1576 /* record error and determine whether speed down is necessary */ 1605 /* record error and determine whether speed down is necessary */
1577 ata_ering_record(&dev->ering, is_io, err_mask); 1606 ata_ering_record(&dev->ering, eflags, err_mask);
1578 verdict = ata_eh_speed_down_verdict(dev); 1607 verdict = ata_eh_speed_down_verdict(dev);
1579 1608
1580 /* turn off NCQ? */ 1609 /* turn off NCQ? */
@@ -1590,7 +1619,7 @@ static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io,
1590 /* speed down? */ 1619 /* speed down? */
1591 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 1620 if (verdict & ATA_EH_SPDN_SPEED_DOWN) {
1592 /* speed down SATA link speed if possible */ 1621 /* speed down SATA link speed if possible */
1593 if (sata_down_spd_limit(dev->link) == 0) { 1622 if (sata_down_spd_limit(link) == 0) {
1594 action |= ATA_EH_HARDRESET; 1623 action |= ATA_EH_HARDRESET;
1595 goto done; 1624 goto done;
1596 } 1625 }
@@ -1621,7 +1650,7 @@ static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io,
1621 * SATA. Consider it only for PATA. 1650 * SATA. Consider it only for PATA.
1622 */ 1651 */
1623 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 1652 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) &&
1624 (dev->link->ap->cbl != ATA_CBL_SATA) && 1653 (link->ap->cbl != ATA_CBL_SATA) &&
1625 (dev->xfer_shift != ATA_SHIFT_PIO)) { 1654 (dev->xfer_shift != ATA_SHIFT_PIO)) {
1626 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 1655 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) {
1627 dev->spdn_cnt = 0; 1656 dev->spdn_cnt = 0;
@@ -1653,8 +1682,8 @@ static void ata_eh_link_autopsy(struct ata_link *link)
1653 struct ata_port *ap = link->ap; 1682 struct ata_port *ap = link->ap;
1654 struct ata_eh_context *ehc = &link->eh_context; 1683 struct ata_eh_context *ehc = &link->eh_context;
1655 struct ata_device *dev; 1684 struct ata_device *dev;
1656 unsigned int all_err_mask = 0; 1685 unsigned int all_err_mask = 0, eflags = 0;
1657 int tag, is_io = 0; 1686 int tag;
1658 u32 serror; 1687 u32 serror;
1659 int rc; 1688 int rc;
1660 1689
@@ -1713,15 +1742,15 @@ static void ata_eh_link_autopsy(struct ata_link *link)
1713 ehc->i.dev = qc->dev; 1742 ehc->i.dev = qc->dev;
1714 all_err_mask |= qc->err_mask; 1743 all_err_mask |= qc->err_mask;
1715 if (qc->flags & ATA_QCFLAG_IO) 1744 if (qc->flags & ATA_QCFLAG_IO)
1716 is_io = 1; 1745 eflags |= ATA_EFLAG_IS_IO;
1717 } 1746 }
1718 1747
1719 /* enforce default EH actions */ 1748 /* enforce default EH actions */
1720 if (ap->pflags & ATA_PFLAG_FROZEN || 1749 if (ap->pflags & ATA_PFLAG_FROZEN ||
1721 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 1750 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT))
1722 ehc->i.action |= ATA_EH_SOFTRESET; 1751 ehc->i.action |= ATA_EH_SOFTRESET;
1723 else if ((is_io && all_err_mask) || 1752 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) ||
1724 (!is_io && (all_err_mask & ~AC_ERR_DEV))) 1753 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV)))
1725 ehc->i.action |= ATA_EH_REVALIDATE; 1754 ehc->i.action |= ATA_EH_REVALIDATE;
1726 1755
1727 /* If we have offending qcs and the associated failed device, 1756 /* If we have offending qcs and the associated failed device,
@@ -1744,7 +1773,7 @@ static void ata_eh_link_autopsy(struct ata_link *link)
1744 dev = link->device; 1773 dev = link->device;
1745 1774
1746 if (dev) 1775 if (dev)
1747 ehc->i.action |= ata_eh_speed_down(dev, is_io, all_err_mask); 1776 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask);
1748 1777
1749 DPRINTK("EXIT\n"); 1778 DPRINTK("EXIT\n");
1750} 1779}
diff --git a/include/linux/libata.h b/include/linux/libata.h
index ca347b018649..74f1255e2524 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -482,7 +482,7 @@ struct ata_port_stats {
482}; 482};
483 483
484struct ata_ering_entry { 484struct ata_ering_entry {
485 int is_io; 485 unsigned int eflags;
486 unsigned int err_mask; 486 unsigned int err_mask;
487 u64 timestamp; 487 u64 timestamp;
488}; 488};