diff options
-rw-r--r-- | drivers/ata/libata-eh.c | 117 | ||||
-rw-r--r-- | include/linux/libata.h | 2 |
2 files changed, 74 insertions, 45 deletions
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index ebab75958900..b01ade102727 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c | |||
@@ -46,9 +46,20 @@ | |||
46 | #include "libata.h" | 46 | #include "libata.h" |
47 | 47 | ||
48 | enum { | 48 | enum { |
49 | /* speed down verdicts */ | ||
49 | ATA_EH_SPDN_NCQ_OFF = (1 << 0), | 50 | ATA_EH_SPDN_NCQ_OFF = (1 << 0), |
50 | ATA_EH_SPDN_SPEED_DOWN = (1 << 1), | 51 | ATA_EH_SPDN_SPEED_DOWN = (1 << 1), |
51 | ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), | 52 | ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), |
53 | |||
54 | /* error flags */ | ||
55 | ATA_EFLAG_IS_IO = (1 << 0), | ||
56 | |||
57 | /* error categories */ | ||
58 | ATA_ECAT_NONE = 0, | ||
59 | ATA_ECAT_ATA_BUS = 1, | ||
60 | ATA_ECAT_TOUT_HSM = 2, | ||
61 | ATA_ECAT_UNK_DEV = 3, | ||
62 | ATA_ECAT_NR = 4, | ||
52 | }; | 63 | }; |
53 | 64 | ||
54 | /* Waiting in ->prereset can never be reliable. It's sometimes nice | 65 | /* Waiting in ->prereset can never be reliable. It's sometimes nice |
@@ -218,7 +229,7 @@ void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, | |||
218 | 229 | ||
219 | #endif /* CONFIG_PCI */ | 230 | #endif /* CONFIG_PCI */ |
220 | 231 | ||
221 | static void ata_ering_record(struct ata_ering *ering, int is_io, | 232 | static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, |
222 | unsigned int err_mask) | 233 | unsigned int err_mask) |
223 | { | 234 | { |
224 | struct ata_ering_entry *ent; | 235 | struct ata_ering_entry *ent; |
@@ -229,7 +240,7 @@ static void ata_ering_record(struct ata_ering *ering, int is_io, | |||
229 | ering->cursor %= ATA_ERING_SIZE; | 240 | ering->cursor %= ATA_ERING_SIZE; |
230 | 241 | ||
231 | ent = &ering->ring[ering->cursor]; | 242 | ent = &ering->ring[ering->cursor]; |
232 | ent->is_io = is_io; | 243 | ent->eflags = eflags; |
233 | ent->err_mask = err_mask; | 244 | ent->err_mask = err_mask; |
234 | ent->timestamp = get_jiffies_64(); | 245 | ent->timestamp = get_jiffies_64(); |
235 | } | 246 | } |
@@ -1451,20 +1462,20 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, | |||
1451 | return action; | 1462 | return action; |
1452 | } | 1463 | } |
1453 | 1464 | ||
1454 | static int ata_eh_categorize_error(int is_io, unsigned int err_mask) | 1465 | static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask) |
1455 | { | 1466 | { |
1456 | if (err_mask & AC_ERR_ATA_BUS) | 1467 | if (err_mask & AC_ERR_ATA_BUS) |
1457 | return 1; | 1468 | return ATA_ECAT_ATA_BUS; |
1458 | 1469 | ||
1459 | if (err_mask & AC_ERR_TIMEOUT) | 1470 | if (err_mask & AC_ERR_TIMEOUT) |
1460 | return 2; | 1471 | return ATA_ECAT_TOUT_HSM; |
1461 | 1472 | ||
1462 | if (is_io) { | 1473 | if (eflags & ATA_EFLAG_IS_IO) { |
1463 | if (err_mask & AC_ERR_HSM) | 1474 | if (err_mask & AC_ERR_HSM) |
1464 | return 2; | 1475 | return ATA_ECAT_TOUT_HSM; |
1465 | if ((err_mask & | 1476 | if ((err_mask & |
1466 | (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) | 1477 | (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) |
1467 | return 3; | 1478 | return ATA_ECAT_UNK_DEV; |
1468 | } | 1479 | } |
1469 | 1480 | ||
1470 | return 0; | 1481 | return 0; |
@@ -1472,13 +1483,13 @@ static int ata_eh_categorize_error(int is_io, unsigned int err_mask) | |||
1472 | 1483 | ||
1473 | struct speed_down_verdict_arg { | 1484 | struct speed_down_verdict_arg { |
1474 | u64 since; | 1485 | u64 since; |
1475 | int nr_errors[4]; | 1486 | int nr_errors[ATA_ECAT_NR]; |
1476 | }; | 1487 | }; |
1477 | 1488 | ||
1478 | static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) | 1489 | static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) |
1479 | { | 1490 | { |
1480 | struct speed_down_verdict_arg *arg = void_arg; | 1491 | struct speed_down_verdict_arg *arg = void_arg; |
1481 | int cat = ata_eh_categorize_error(ent->is_io, ent->err_mask); | 1492 | int cat = ata_eh_categorize_error(ent->eflags, ent->err_mask); |
1482 | 1493 | ||
1483 | if (ent->timestamp < arg->since) | 1494 | if (ent->timestamp < arg->since) |
1484 | return -1; | 1495 | return -1; |
@@ -1495,22 +1506,33 @@ static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) | |||
1495 | * whether NCQ needs to be turned off, transfer speed should be | 1506 | * whether NCQ needs to be turned off, transfer speed should be |
1496 | * stepped down, or falling back to PIO is necessary. | 1507 | * stepped down, or falling back to PIO is necessary. |
1497 | * | 1508 | * |
1498 | * Cat-1 is ATA_BUS error for any command. | 1509 | * ECAT_ATA_BUS : ATA_BUS error for any command |
1510 | * | ||
1511 | * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for | ||
1512 | * IO commands | ||
1513 | * | ||
1514 | * ECAT_UNK_DEV : Unknown DEV error for IO commands | ||
1515 | * | ||
1516 | * Verdicts are | ||
1499 | * | 1517 | * |
1500 | * Cat-2 is TIMEOUT for any command or HSM violation for known | 1518 | * NCQ_OFF : Turn off NCQ. |
1501 | * supported commands. | ||
1502 | * | 1519 | * |
1503 | * Cat-3 is is unclassified DEV error for known supported | 1520 | * SPEED_DOWN : Speed down transfer speed but don't fall back |
1504 | * command. | 1521 | * to PIO. |
1505 | * | 1522 | * |
1506 | * NCQ needs to be turned off if there have been more than 3 | 1523 | * FALLBACK_TO_PIO : Fall back to PIO. |
1507 | * Cat-2 + Cat-3 errors during last 10 minutes. | ||
1508 | * | 1524 | * |
1509 | * Speed down is necessary if there have been more than 3 Cat-1 + | 1525 | * Even if multiple verdicts are returned, only one action is |
1510 | * Cat-2 errors or 10 Cat-3 errors during last 10 minutes. | 1526 | * taken per error. ering is cleared after an action is taken. |
1511 | * | 1527 | * |
1512 | * Falling back to PIO mode is necessary if there have been more | 1528 | * 1. If more than 10 ATA_BUS, TOUT_HSM or UNK_DEV errors |
1513 | * than 10 Cat-1 + Cat-2 + Cat-3 errors during last 5 minutes. | 1529 | * ocurred during last 5 mins, FALLBACK_TO_PIO |
1530 | * | ||
1531 | * 2. If more than 3 TOUT_HSM or UNK_DEV errors occurred | ||
1532 | * during last 10 mins, NCQ_OFF. | ||
1533 | * | ||
1534 | * 3. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 10 | ||
1535 | * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. | ||
1514 | * | 1536 | * |
1515 | * LOCKING: | 1537 | * LOCKING: |
1516 | * Inherited from caller. | 1538 | * Inherited from caller. |
@@ -1525,23 +1547,29 @@ static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) | |||
1525 | struct speed_down_verdict_arg arg; | 1547 | struct speed_down_verdict_arg arg; |
1526 | unsigned int verdict = 0; | 1548 | unsigned int verdict = 0; |
1527 | 1549 | ||
1528 | /* scan past 10 mins of error history */ | 1550 | /* scan past 5 mins of error history */ |
1529 | memset(&arg, 0, sizeof(arg)); | 1551 | memset(&arg, 0, sizeof(arg)); |
1530 | arg.since = j64 - min(j64, j10mins); | 1552 | arg.since = j64 - min(j64, j5mins); |
1531 | ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); | 1553 | ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); |
1532 | 1554 | ||
1533 | if (arg.nr_errors[2] + arg.nr_errors[3] > 3) | 1555 | if (arg.nr_errors[ATA_ECAT_ATA_BUS] + |
1534 | verdict |= ATA_EH_SPDN_NCQ_OFF; | 1556 | arg.nr_errors[ATA_ECAT_TOUT_HSM] + |
1535 | if (arg.nr_errors[1] + arg.nr_errors[2] > 3 || arg.nr_errors[3] > 10) | 1557 | arg.nr_errors[ATA_ECAT_UNK_DEV] > 10) |
1536 | verdict |= ATA_EH_SPDN_SPEED_DOWN; | 1558 | verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; |
1537 | 1559 | ||
1538 | /* scan past 3 mins of error history */ | 1560 | /* scan past 10 mins of error history */ |
1539 | memset(&arg, 0, sizeof(arg)); | 1561 | memset(&arg, 0, sizeof(arg)); |
1540 | arg.since = j64 - min(j64, j5mins); | 1562 | arg.since = j64 - min(j64, j10mins); |
1541 | ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); | 1563 | ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); |
1542 | 1564 | ||
1543 | if (arg.nr_errors[1] + arg.nr_errors[2] + arg.nr_errors[3] > 10) | 1565 | if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + |
1544 | verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; | 1566 | arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) |
1567 | verdict |= ATA_EH_SPDN_NCQ_OFF; | ||
1568 | |||
1569 | if (arg.nr_errors[ATA_ECAT_ATA_BUS] + | ||
1570 | arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || | ||
1571 | arg.nr_errors[ATA_ECAT_UNK_DEV] > 10) | ||
1572 | verdict |= ATA_EH_SPDN_SPEED_DOWN; | ||
1545 | 1573 | ||
1546 | return verdict; | 1574 | return verdict; |
1547 | } | 1575 | } |
@@ -1549,7 +1577,7 @@ static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) | |||
1549 | /** | 1577 | /** |
1550 | * ata_eh_speed_down - record error and speed down if necessary | 1578 | * ata_eh_speed_down - record error and speed down if necessary |
1551 | * @dev: Failed device | 1579 | * @dev: Failed device |
1552 | * @is_io: Did the device fail during normal IO? | 1580 | * @eflags: mask of ATA_EFLAG_* flags |
1553 | * @err_mask: err_mask of the error | 1581 | * @err_mask: err_mask of the error |
1554 | * | 1582 | * |
1555 | * Record error and examine error history to determine whether | 1583 | * Record error and examine error history to determine whether |
@@ -1563,18 +1591,19 @@ static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) | |||
1563 | * RETURNS: | 1591 | * RETURNS: |
1564 | * Determined recovery action. | 1592 | * Determined recovery action. |
1565 | */ | 1593 | */ |
1566 | static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io, | 1594 | static unsigned int ata_eh_speed_down(struct ata_device *dev, |
1567 | unsigned int err_mask) | 1595 | unsigned int eflags, unsigned int err_mask) |
1568 | { | 1596 | { |
1597 | struct ata_link *link = dev->link; | ||
1569 | unsigned int verdict; | 1598 | unsigned int verdict; |
1570 | unsigned int action = 0; | 1599 | unsigned int action = 0; |
1571 | 1600 | ||
1572 | /* don't bother if Cat-0 error */ | 1601 | /* don't bother if Cat-0 error */ |
1573 | if (ata_eh_categorize_error(is_io, err_mask) == 0) | 1602 | if (ata_eh_categorize_error(eflags, err_mask) == 0) |
1574 | return 0; | 1603 | return 0; |
1575 | 1604 | ||
1576 | /* record error and determine whether speed down is necessary */ | 1605 | /* record error and determine whether speed down is necessary */ |
1577 | ata_ering_record(&dev->ering, is_io, err_mask); | 1606 | ata_ering_record(&dev->ering, eflags, err_mask); |
1578 | verdict = ata_eh_speed_down_verdict(dev); | 1607 | verdict = ata_eh_speed_down_verdict(dev); |
1579 | 1608 | ||
1580 | /* turn off NCQ? */ | 1609 | /* turn off NCQ? */ |
@@ -1590,7 +1619,7 @@ static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io, | |||
1590 | /* speed down? */ | 1619 | /* speed down? */ |
1591 | if (verdict & ATA_EH_SPDN_SPEED_DOWN) { | 1620 | if (verdict & ATA_EH_SPDN_SPEED_DOWN) { |
1592 | /* speed down SATA link speed if possible */ | 1621 | /* speed down SATA link speed if possible */ |
1593 | if (sata_down_spd_limit(dev->link) == 0) { | 1622 | if (sata_down_spd_limit(link) == 0) { |
1594 | action |= ATA_EH_HARDRESET; | 1623 | action |= ATA_EH_HARDRESET; |
1595 | goto done; | 1624 | goto done; |
1596 | } | 1625 | } |
@@ -1621,7 +1650,7 @@ static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io, | |||
1621 | * SATA. Consider it only for PATA. | 1650 | * SATA. Consider it only for PATA. |
1622 | */ | 1651 | */ |
1623 | if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && | 1652 | if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && |
1624 | (dev->link->ap->cbl != ATA_CBL_SATA) && | 1653 | (link->ap->cbl != ATA_CBL_SATA) && |
1625 | (dev->xfer_shift != ATA_SHIFT_PIO)) { | 1654 | (dev->xfer_shift != ATA_SHIFT_PIO)) { |
1626 | if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { | 1655 | if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { |
1627 | dev->spdn_cnt = 0; | 1656 | dev->spdn_cnt = 0; |
@@ -1653,8 +1682,8 @@ static void ata_eh_link_autopsy(struct ata_link *link) | |||
1653 | struct ata_port *ap = link->ap; | 1682 | struct ata_port *ap = link->ap; |
1654 | struct ata_eh_context *ehc = &link->eh_context; | 1683 | struct ata_eh_context *ehc = &link->eh_context; |
1655 | struct ata_device *dev; | 1684 | struct ata_device *dev; |
1656 | unsigned int all_err_mask = 0; | 1685 | unsigned int all_err_mask = 0, eflags = 0; |
1657 | int tag, is_io = 0; | 1686 | int tag; |
1658 | u32 serror; | 1687 | u32 serror; |
1659 | int rc; | 1688 | int rc; |
1660 | 1689 | ||
@@ -1713,15 +1742,15 @@ static void ata_eh_link_autopsy(struct ata_link *link) | |||
1713 | ehc->i.dev = qc->dev; | 1742 | ehc->i.dev = qc->dev; |
1714 | all_err_mask |= qc->err_mask; | 1743 | all_err_mask |= qc->err_mask; |
1715 | if (qc->flags & ATA_QCFLAG_IO) | 1744 | if (qc->flags & ATA_QCFLAG_IO) |
1716 | is_io = 1; | 1745 | eflags |= ATA_EFLAG_IS_IO; |
1717 | } | 1746 | } |
1718 | 1747 | ||
1719 | /* enforce default EH actions */ | 1748 | /* enforce default EH actions */ |
1720 | if (ap->pflags & ATA_PFLAG_FROZEN || | 1749 | if (ap->pflags & ATA_PFLAG_FROZEN || |
1721 | all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) | 1750 | all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) |
1722 | ehc->i.action |= ATA_EH_SOFTRESET; | 1751 | ehc->i.action |= ATA_EH_SOFTRESET; |
1723 | else if ((is_io && all_err_mask) || | 1752 | else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || |
1724 | (!is_io && (all_err_mask & ~AC_ERR_DEV))) | 1753 | (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) |
1725 | ehc->i.action |= ATA_EH_REVALIDATE; | 1754 | ehc->i.action |= ATA_EH_REVALIDATE; |
1726 | 1755 | ||
1727 | /* If we have offending qcs and the associated failed device, | 1756 | /* If we have offending qcs and the associated failed device, |
@@ -1744,7 +1773,7 @@ static void ata_eh_link_autopsy(struct ata_link *link) | |||
1744 | dev = link->device; | 1773 | dev = link->device; |
1745 | 1774 | ||
1746 | if (dev) | 1775 | if (dev) |
1747 | ehc->i.action |= ata_eh_speed_down(dev, is_io, all_err_mask); | 1776 | ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); |
1748 | 1777 | ||
1749 | DPRINTK("EXIT\n"); | 1778 | DPRINTK("EXIT\n"); |
1750 | } | 1779 | } |
diff --git a/include/linux/libata.h b/include/linux/libata.h index ca347b018649..74f1255e2524 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h | |||
@@ -482,7 +482,7 @@ struct ata_port_stats { | |||
482 | }; | 482 | }; |
483 | 483 | ||
484 | struct ata_ering_entry { | 484 | struct ata_ering_entry { |
485 | int is_io; | 485 | unsigned int eflags; |
486 | unsigned int err_mask; | 486 | unsigned int err_mask; |
487 | u64 timestamp; | 487 | u64 timestamp; |
488 | }; | 488 | }; |