diff options
author | Tejun Heo <htejun@gmail.com> | 2007-02-02 02:50:52 -0500 |
---|---|---|
committer | Jeff Garzik <jeff@garzik.org> | 2007-05-01 07:49:54 -0400 |
commit | 31daabda16063b64a99a526242add727601e43c3 (patch) | |
tree | 771e48219041e0a60f33f439c35324d0a3849224 /drivers | |
parent | b8cffc6ad8c000410186815b7bcc6b76ef1bbb13 (diff) |
libata: reimplement reset sequencing
libata previously depended upon waits in prereset to get resets after
hotplug right for both spin up and device ready wait. This was
necessary both for reliablity and speed as reset was likely to fail if
initiated too early and each try usually took more than 30secs to
fail. Previous patches fixed the reliability part by fixing status
and SCR handling in resets. This patch remedies the speed part by
improving reset sequencing.
Prereset waiting timeout is adjusted to 10s because spinup wait is
replaced by reset sequencing and !BSY wait is not as important as
before. During boot or module loading where the drive is already
fully spun up, !BSY wait succeeds immediately, so 10s should be enough
in most cases. It matters after hotplugging or other error
conditions, but in those cases, !BSY wait in prereset simply can't be
relied upon due to the varied and weird behaviors ATA controllers and
devices show.
Reset is now driven by ata_eh_reset_timeouts[] table which contains
timeouts for each reset try. The first reset can be softreset but the
following ones are always hardreset if available. Each timeout
defines deadline for the reset try. If a reset try fails, reset is
retried with the next timeout till the end of the timeout table is
reached. If a reset try fails before the timeout with error, libata
waits till the deadline of the failed try before retrying.
IOW, the timeout table defines timetable of reset tries such that the
n'th try always begins at least after the sum of all previous timeouts
has passed. The current timetable defines 4 tries and takes around 1
minute.
@0 : First try. This should succeed most of the time during boot.
@10 : 10s is enough to spin up most consumer harddrives. Give it
another shot.
@20 : 20s should spin up > 99% of working drives. This has 30s
timeout for retarded devices needing long idleness post reset.
@55 : Final try with 5s timeout just in case.
The above timetable is trade off between not annoying the device too
much with frequent resets and taking reasonable amount of time in most
cases. Some controllers may do better with shorter timeouts while
others may fare better with longer but we just can't rely upon LLD
writers to test each controller with wide variety of devices using
various scenarios. We need default behavior which reasonably fits
most cases.
I've tested the above timetable on a dozen SATA controllers and a few
PATA controllers with about a dozen different drives from all major
vendors and 4 different ODDs from three different vendors for both
boot and hotplug (if available) cases.
Boot probing is not affected unless the device is broken in which
cases new code gives up on the port after a minute rather than five or
nine minutes. When hotplugging, most devices get detected on the
first or second try. Multi-platter drives with long spin up time
which sometimes took > 40 secs with the original code, now usually
comes up during the second try and at least right after the third try
@20.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/ata/libata-core.c | 35 | ||||
-rw-r--r-- | drivers/ata/libata-eh.c | 58 |
2 files changed, 42 insertions, 51 deletions
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 12717fa5e888..a7950885d18e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c | |||
@@ -3304,35 +3304,6 @@ int sata_phy_resume(struct ata_port *ap, const unsigned long *params, | |||
3304 | return sata_phy_debounce(ap, params, deadline); | 3304 | return sata_phy_debounce(ap, params, deadline); |
3305 | } | 3305 | } |
3306 | 3306 | ||
3307 | static void ata_wait_spinup(struct ata_port *ap, unsigned long deadline) | ||
3308 | { | ||
3309 | struct ata_eh_context *ehc = &ap->eh_context; | ||
3310 | unsigned long end, secs; | ||
3311 | int rc; | ||
3312 | |||
3313 | /* first, debounce phy if SATA */ | ||
3314 | if (ap->cbl == ATA_CBL_SATA) { | ||
3315 | rc = sata_phy_debounce(ap, sata_deb_timing_hotplug, deadline); | ||
3316 | |||
3317 | /* if debounced successfully and offline, no need to wait */ | ||
3318 | if ((rc == 0 || rc == -EOPNOTSUPP) && ata_port_offline(ap)) | ||
3319 | return; | ||
3320 | } | ||
3321 | |||
3322 | /* okay, let's give the drive time to spin up */ | ||
3323 | end = ehc->i.hotplug_timestamp + ATA_SPINUP_WAIT * HZ / 1000; | ||
3324 | secs = ((end - jiffies) + HZ - 1) / HZ; | ||
3325 | |||
3326 | if (time_after(jiffies, end)) | ||
3327 | return; | ||
3328 | |||
3329 | if (secs > 5) | ||
3330 | ata_port_printk(ap, KERN_INFO, "waiting for device to spin up " | ||
3331 | "(%lu secs)\n", secs); | ||
3332 | |||
3333 | schedule_timeout_uninterruptible(end - jiffies); | ||
3334 | } | ||
3335 | |||
3336 | /** | 3307 | /** |
3337 | * ata_std_prereset - prepare for reset | 3308 | * ata_std_prereset - prepare for reset |
3338 | * @ap: ATA port to be reset | 3309 | * @ap: ATA port to be reset |
@@ -3356,15 +3327,11 @@ int ata_std_prereset(struct ata_port *ap, unsigned long deadline) | |||
3356 | const unsigned long *timing = sata_ehc_deb_timing(ehc); | 3327 | const unsigned long *timing = sata_ehc_deb_timing(ehc); |
3357 | int rc; | 3328 | int rc; |
3358 | 3329 | ||
3359 | /* handle link resume & hotplug spinup */ | 3330 | /* handle link resume */ |
3360 | if ((ehc->i.flags & ATA_EHI_RESUME_LINK) && | 3331 | if ((ehc->i.flags & ATA_EHI_RESUME_LINK) && |
3361 | (ap->flags & ATA_FLAG_HRST_TO_RESUME)) | 3332 | (ap->flags & ATA_FLAG_HRST_TO_RESUME)) |
3362 | ehc->i.action |= ATA_EH_HARDRESET; | 3333 | ehc->i.action |= ATA_EH_HARDRESET; |
3363 | 3334 | ||
3364 | if ((ehc->i.flags & ATA_EHI_HOTPLUGGED) && | ||
3365 | (ap->flags & ATA_FLAG_SKIP_D2H_BSY)) | ||
3366 | ata_wait_spinup(ap, deadline); | ||
3367 | |||
3368 | /* if we're about to do hardreset, nothing more to do */ | 3335 | /* if we're about to do hardreset, nothing more to do */ |
3369 | if (ehc->i.action & ATA_EH_HARDRESET) | 3336 | if (ehc->i.action & ATA_EH_HARDRESET) |
3370 | return 0; | 3337 | return 0; |
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index b3f7d3c8ae60..8256655ce7d9 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c | |||
@@ -50,6 +50,28 @@ enum { | |||
50 | ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), | 50 | ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), |
51 | }; | 51 | }; |
52 | 52 | ||
53 | /* Waiting in ->prereset can never be reliable. It's sometimes nice | ||
54 | * to wait there but it can't be depended upon; otherwise, we wouldn't | ||
55 | * be resetting. Just give it enough time for most drives to spin up. | ||
56 | */ | ||
57 | enum { | ||
58 | ATA_EH_PRERESET_TIMEOUT = 10 * HZ, | ||
59 | }; | ||
60 | |||
61 | /* The following table determines how we sequence resets. Each entry | ||
62 | * represents timeout for that try. The first try can be soft or | ||
63 | * hardreset. All others are hardreset if available. In most cases | ||
64 | * the first reset w/ 10sec timeout should succeed. Following entries | ||
65 | * are mostly for error handling, hotplug and retarded devices. | ||
66 | */ | ||
67 | static const unsigned long ata_eh_reset_timeouts[] = { | ||
68 | 10 * HZ, /* most drives spin up by 10sec */ | ||
69 | 10 * HZ, /* > 99% working drives spin up before 20sec */ | ||
70 | 35 * HZ, /* give > 30 secs of idleness for retarded devices */ | ||
71 | 5 * HZ, /* and sweet one last chance */ | ||
72 | /* > 1 min has elapsed, give up */ | ||
73 | }; | ||
74 | |||
53 | static void __ata_port_freeze(struct ata_port *ap); | 75 | static void __ata_port_freeze(struct ata_port *ap); |
54 | static void ata_eh_finish(struct ata_port *ap); | 76 | static void ata_eh_finish(struct ata_port *ap); |
55 | #ifdef CONFIG_PM | 77 | #ifdef CONFIG_PM |
@@ -1603,8 +1625,9 @@ static int ata_eh_reset(struct ata_port *ap, int classify, | |||
1603 | { | 1625 | { |
1604 | struct ata_eh_context *ehc = &ap->eh_context; | 1626 | struct ata_eh_context *ehc = &ap->eh_context; |
1605 | unsigned int *classes = ehc->classes; | 1627 | unsigned int *classes = ehc->classes; |
1606 | int tries = ATA_EH_RESET_TRIES; | ||
1607 | int verbose = !(ehc->i.flags & ATA_EHI_QUIET); | 1628 | int verbose = !(ehc->i.flags & ATA_EHI_QUIET); |
1629 | int try = 0; | ||
1630 | unsigned long deadline; | ||
1608 | unsigned int action; | 1631 | unsigned int action; |
1609 | ata_reset_fn_t reset; | 1632 | ata_reset_fn_t reset; |
1610 | int i, did_followup_srst, rc; | 1633 | int i, did_followup_srst, rc; |
@@ -1624,7 +1647,7 @@ static int ata_eh_reset(struct ata_port *ap, int classify, | |||
1624 | ehc->i.action |= ATA_EH_HARDRESET; | 1647 | ehc->i.action |= ATA_EH_HARDRESET; |
1625 | 1648 | ||
1626 | if (prereset) { | 1649 | if (prereset) { |
1627 | rc = prereset(ap, jiffies + 40 * HZ); | 1650 | rc = prereset(ap, jiffies + ATA_EH_PRERESET_TIMEOUT); |
1628 | if (rc) { | 1651 | if (rc) { |
1629 | if (rc == -ENOENT) { | 1652 | if (rc == -ENOENT) { |
1630 | ata_port_printk(ap, KERN_DEBUG, | 1653 | ata_port_printk(ap, KERN_DEBUG, |
@@ -1665,6 +1688,8 @@ static int ata_eh_reset(struct ata_port *ap, int classify, | |||
1665 | } | 1688 | } |
1666 | 1689 | ||
1667 | retry: | 1690 | retry: |
1691 | deadline = jiffies + ata_eh_reset_timeouts[try++]; | ||
1692 | |||
1668 | /* shut up during boot probing */ | 1693 | /* shut up during boot probing */ |
1669 | if (verbose) | 1694 | if (verbose) |
1670 | ata_port_printk(ap, KERN_INFO, "%s resetting port\n", | 1695 | ata_port_printk(ap, KERN_INFO, "%s resetting port\n", |
@@ -1676,7 +1701,7 @@ static int ata_eh_reset(struct ata_port *ap, int classify, | |||
1676 | else | 1701 | else |
1677 | ehc->i.flags |= ATA_EHI_DID_SOFTRESET; | 1702 | ehc->i.flags |= ATA_EHI_DID_SOFTRESET; |
1678 | 1703 | ||
1679 | rc = ata_do_reset(ap, reset, classes, jiffies + 40 * HZ); | 1704 | rc = ata_do_reset(ap, reset, classes, deadline); |
1680 | 1705 | ||
1681 | did_followup_srst = 0; | 1706 | did_followup_srst = 0; |
1682 | if (reset == hardreset && | 1707 | if (reset == hardreset && |
@@ -1693,7 +1718,7 @@ static int ata_eh_reset(struct ata_port *ap, int classify, | |||
1693 | } | 1718 | } |
1694 | 1719 | ||
1695 | ata_eh_about_to_do(ap, NULL, ATA_EH_RESET_MASK); | 1720 | ata_eh_about_to_do(ap, NULL, ATA_EH_RESET_MASK); |
1696 | rc = ata_do_reset(ap, reset, classes, jiffies + 40 * HZ); | 1721 | rc = ata_do_reset(ap, reset, classes, deadline); |
1697 | 1722 | ||
1698 | if (rc == 0 && classify && | 1723 | if (rc == 0 && classify && |
1699 | classes[0] == ATA_DEV_UNKNOWN) { | 1724 | classes[0] == ATA_DEV_UNKNOWN) { |
@@ -1703,22 +1728,21 @@ static int ata_eh_reset(struct ata_port *ap, int classify, | |||
1703 | } | 1728 | } |
1704 | } | 1729 | } |
1705 | 1730 | ||
1706 | if (rc && --tries) { | 1731 | if (rc && try < ARRAY_SIZE(ata_eh_reset_timeouts)) { |
1707 | const char *type; | 1732 | unsigned long now = jiffies; |
1708 | 1733 | ||
1709 | if (reset == softreset) { | 1734 | if (time_before(now, deadline)) { |
1710 | if (did_followup_srst) | 1735 | unsigned long delta = deadline - jiffies; |
1711 | type = "follow-up soft"; | ||
1712 | else | ||
1713 | type = "soft"; | ||
1714 | } else | ||
1715 | type = "hard"; | ||
1716 | 1736 | ||
1717 | ata_port_printk(ap, KERN_WARNING, | 1737 | ata_port_printk(ap, KERN_WARNING, "reset failed " |
1718 | "%sreset failed, retrying in 5 secs\n", type); | 1738 | "(errno=%d), retrying in %u secs\n", |
1719 | ssleep(5); | 1739 | rc, (jiffies_to_msecs(delta) + 999) / 1000); |
1740 | |||
1741 | schedule_timeout_uninterruptible(delta); | ||
1742 | } | ||
1720 | 1743 | ||
1721 | if (reset == hardreset) | 1744 | if (reset == hardreset && |
1745 | try == ARRAY_SIZE(ata_eh_reset_timeouts) - 1) | ||
1722 | sata_down_spd_limit(ap); | 1746 | sata_down_spd_limit(ap); |
1723 | if (hardreset) | 1747 | if (hardreset) |
1724 | reset = hardreset; | 1748 | reset = hardreset; |