aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <htejun@gmail.com>2008-05-18 12:15:08 -0400
committerJeff Garzik <jgarzik@redhat.com>2008-05-19 17:51:47 -0400
commitf046519fc85a8fdf6a058b4ac9d897cdee6f3e52 (patch)
treef4ee1fc8edaeaa2fb0245b82925ef40243a9f26f
parentdc98c32cbe80750ae2d9d9fbdae305d38f005de7 (diff)
libata: kill hotplug related race condition
Originally, whole reset processing was done while the port is frozen and SError was cleared during @postreset(). This had two race conditions. 1: hotplug could occur after reset but before SError is cleared and libata won't know about it. 2: hotplug could occur after all the reset is complete but before the port is thawed. As all events are cleared on thaw, the hotplug event would be lost. Commit ac371987a81c61c2efbd6931245cdcaf43baad89 kills the first race by clearing SError during link resume but before link onlineness test. However, this doesn't fix race #2 and in some cases clearing SError after SRST is a good idea. This patch solves this problem by cross checking link onlineness with classification result after SError is cleared and port is thawed. Reset is retried if link is online but all devices attached to the link are unknown. As all devices will be revalidated, this one-way check is enough to ensure that all devices are detected and revalidated reliably. This, luckily, also fixes the cases where host controller returns bogus status while harddrive is spinning up after hotplug making classification run before the device sends the first FIS and thus causes misdetection. Low level drivers can bypass the logic by setting class explicitly to ATA_DEV_NONE if ever necessary (currently none requires this). Signed-off-by: Tejun Heo <htejun@gmail.com> Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
-rw-r--r--drivers/ata/libata-core.c21
-rw-r--r--drivers/ata/libata-eh.c52
2 files changed, 50 insertions, 23 deletions
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index c6c316fc8379..ffc689d9e972 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3490,22 +3490,11 @@ int sata_link_resume(struct ata_link *link, const unsigned long *params,
3490 if ((rc = sata_link_debounce(link, params, deadline))) 3490 if ((rc = sata_link_debounce(link, params, deadline)))
3491 return rc; 3491 return rc;
3492 3492
3493 /* Clear SError. PMP and some host PHYs require this to 3493 /* clear SError, some PHYs require this even for SRST to work */
3494 * operate and clearing should be done before checking PHY
3495 * online status to avoid race condition (hotplugging between
3496 * link resume and status check).
3497 */
3498 if (!(rc = sata_scr_read(link, SCR_ERROR, &serror))) 3494 if (!(rc = sata_scr_read(link, SCR_ERROR, &serror)))
3499 rc = sata_scr_write(link, SCR_ERROR, serror); 3495 rc = sata_scr_write(link, SCR_ERROR, serror);
3500 if (rc == 0 || rc == -EINVAL) {
3501 unsigned long flags;
3502 3496
3503 spin_lock_irqsave(link->ap->lock, flags); 3497 return rc != -EINVAL ? rc : 0;
3504 link->eh_info.serror = 0;
3505 spin_unlock_irqrestore(link->ap->lock, flags);
3506 rc = 0;
3507 }
3508 return rc;
3509} 3498}
3510 3499
3511/** 3500/**
@@ -3704,8 +3693,14 @@ int sata_std_hardreset(struct ata_link *link, unsigned int *class,
3704 */ 3693 */
3705void ata_std_postreset(struct ata_link *link, unsigned int *classes) 3694void ata_std_postreset(struct ata_link *link, unsigned int *classes)
3706{ 3695{
3696 u32 serror;
3697
3707 DPRINTK("ENTER\n"); 3698 DPRINTK("ENTER\n");
3708 3699
3700 /* reset complete, clear SError */
3701 if (!sata_scr_read(link, SCR_ERROR, &serror))
3702 sata_scr_write(link, SCR_ERROR, serror);
3703
3709 /* print link status */ 3704 /* print link status */
3710 sata_print_link_status(link); 3705 sata_print_link_status(link);
3711 3706
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 06a92c58a49d..751dad0138ae 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -2047,19 +2047,11 @@ static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset,
2047 unsigned int *classes, unsigned long deadline) 2047 unsigned int *classes, unsigned long deadline)
2048{ 2048{
2049 struct ata_device *dev; 2049 struct ata_device *dev;
2050 int rc;
2051 2050
2052 ata_link_for_each_dev(dev, link) 2051 ata_link_for_each_dev(dev, link)
2053 classes[dev->devno] = ATA_DEV_UNKNOWN; 2052 classes[dev->devno] = ATA_DEV_UNKNOWN;
2054 2053
2055 rc = reset(link, classes, deadline); 2054 return reset(link, classes, deadline);
2056
2057 /* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */
2058 ata_link_for_each_dev(dev, link)
2059 if (classes[dev->devno] == ATA_DEV_UNKNOWN)
2060 classes[dev->devno] = ATA_DEV_NONE;
2061
2062 return rc;
2063} 2055}
2064 2056
2065static int ata_eh_followup_srst_needed(struct ata_link *link, 2057static int ata_eh_followup_srst_needed(struct ata_link *link,
@@ -2096,7 +2088,7 @@ int ata_eh_reset(struct ata_link *link, int classify,
2096 ata_reset_fn_t reset; 2088 ata_reset_fn_t reset;
2097 unsigned long flags; 2089 unsigned long flags;
2098 u32 sstatus; 2090 u32 sstatus;
2099 int rc; 2091 int nr_known, rc;
2100 2092
2101 /* 2093 /*
2102 * Prepare to reset 2094 * Prepare to reset
@@ -2245,9 +2237,49 @@ int ata_eh_reset(struct ata_link *link, int classify,
2245 if (ata_is_host_link(link)) 2237 if (ata_is_host_link(link))
2246 ata_eh_thaw_port(ap); 2238 ata_eh_thaw_port(ap);
2247 2239
2240 /* postreset() should clear hardware SError. Although SError
2241 * is cleared during link resume, clearing SError here is
2242 * necessary as some PHYs raise hotplug events after SRST.
2243 * This introduces race condition where hotplug occurs between
2244 * reset and here. This race is mediated by cross checking
2245 * link onlineness and classification result later.
2246 */
2248 if (postreset) 2247 if (postreset)
2249 postreset(link, classes); 2248 postreset(link, classes);
2250 2249
2250 /* clear cached SError */
2251 spin_lock_irqsave(link->ap->lock, flags);
2252 link->eh_info.serror = 0;
2253 spin_unlock_irqrestore(link->ap->lock, flags);
2254
2255 /* Make sure onlineness and classification result correspond.
2256 * Hotplug could have happened during reset and some
2257 * controllers fail to wait while a drive is spinning up after
2258 * being hotplugged causing misdetection. By cross checking
2259 * link onlineness and classification result, those conditions
2260 * can be reliably detected and retried.
2261 */
2262 nr_known = 0;
2263 ata_link_for_each_dev(dev, link) {
2264 /* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */
2265 if (classes[dev->devno] == ATA_DEV_UNKNOWN)
2266 classes[dev->devno] = ATA_DEV_NONE;
2267 else
2268 nr_known++;
2269 }
2270
2271 if (classify && !nr_known && ata_link_online(link)) {
2272 if (try < max_tries) {
2273 ata_link_printk(link, KERN_WARNING, "link online but "
2274 "device misclassified, retrying\n");
2275 rc = -EAGAIN;
2276 goto fail;
2277 }
2278 ata_link_printk(link, KERN_WARNING,
2279 "link online but device misclassified, "
2280 "device detection might fail\n");
2281 }
2282
2251 /* reset successful, schedule revalidation */ 2283 /* reset successful, schedule revalidation */
2252 ata_eh_done(link, NULL, ATA_EH_RESET); 2284 ata_eh_done(link, NULL, ATA_EH_RESET);
2253 ehc->i.action |= ATA_EH_REVALIDATE; 2285 ehc->i.action |= ATA_EH_REVALIDATE;