aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-03-22 23:24:08 -0400
committerJeff Garzik <jgarzik@redhat.com>2010-03-23 09:39:08 -0400
commit332ac7ff77cdc6a183d78ab129545d7b14a1d57c (patch)
treefae900efeabe1b0d9ebc170d14170d31f3b8cbf4
parent4f1deba435ef75380c1d06fda860c7a15ea16fdf (diff)
libata-sff: fix spurious IRQ handling
Commit 27943620cbd960f710a385ff4a538e14ed3f1922 introduced spurious IRQ handling but it has a race condition where valid completion can be lost while trying to clear spurious IRQ leading to occassional command timeouts. This patch improves SFF interrupt handler such that 1. Once BMDMA HSM is stopped, the condition is never considered spurious. As there's no way to resume stopped BMDMA HSM, if device status doesn't agree with BMDMA status, the only way out is aborting the command (otherwise, it will just end up timing out). 2. ap->ops->sff_check_status() can be safely called to clear spurious device IRQ as it atomically returns completion status but BMDMA IRQ status can't be cleared in safe way if command is in flight. After a spurious IRQ, call ap->ops->sff_irq_clear() only if the respective device is idle and retry completion if sff_check_status() indicates command completion. Please note that ata_piix uses bmdma_status for sff_irq_check() and #2 won't weaken spurious IRQ handling even with in-flight command because if bmdma_status indicates IRQ pending but device status is not on spurious check, the next IRQ handler invocation will abort the command due to #1. This fixes bko#15537. https://bugzilla.kernel.org/show_bug.cgi?id=15537 Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Andrew Benton <b3nton@gmail.com> Cc: Petr Uzel <petr.uzel@centrum.cz> Cc: Rafael J. Wysocki <rjw@sisk.pl> Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
-rw-r--r--drivers/ata/libata-sff.c43
1 files changed, 36 insertions, 7 deletions
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 561dec2481cb..277477251a86 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1667,6 +1667,7 @@ unsigned int ata_sff_host_intr(struct ata_port *ap,
1667{ 1667{
1668 struct ata_eh_info *ehi = &ap->link.eh_info; 1668 struct ata_eh_info *ehi = &ap->link.eh_info;
1669 u8 status, host_stat = 0; 1669 u8 status, host_stat = 0;
1670 bool bmdma_stopped = false;
1670 1671
1671 VPRINTK("ata%u: protocol %d task_state %d\n", 1672 VPRINTK("ata%u: protocol %d task_state %d\n",
1672 ap->print_id, qc->tf.protocol, ap->hsm_task_state); 1673 ap->print_id, qc->tf.protocol, ap->hsm_task_state);
@@ -1699,6 +1700,7 @@ unsigned int ata_sff_host_intr(struct ata_port *ap,
1699 1700
1700 /* before we do anything else, clear DMA-Start bit */ 1701 /* before we do anything else, clear DMA-Start bit */
1701 ap->ops->bmdma_stop(qc); 1702 ap->ops->bmdma_stop(qc);
1703 bmdma_stopped = true;
1702 1704
1703 if (unlikely(host_stat & ATA_DMA_ERR)) { 1705 if (unlikely(host_stat & ATA_DMA_ERR)) {
1704 /* error when transfering data to/from memory */ 1706 /* error when transfering data to/from memory */
@@ -1716,8 +1718,14 @@ unsigned int ata_sff_host_intr(struct ata_port *ap,
1716 1718
1717 /* check main status, clearing INTRQ if needed */ 1719 /* check main status, clearing INTRQ if needed */
1718 status = ata_sff_irq_status(ap); 1720 status = ata_sff_irq_status(ap);
1719 if (status & ATA_BUSY) 1721 if (status & ATA_BUSY) {
1720 goto idle_irq; 1722 if (bmdma_stopped) {
1723 /* BMDMA engine is already stopped, we're screwed */
1724 qc->err_mask |= AC_ERR_HSM;
1725 ap->hsm_task_state = HSM_ST_ERR;
1726 } else
1727 goto idle_irq;
1728 }
1721 1729
1722 /* ack bmdma irq events */ 1730 /* ack bmdma irq events */
1723 ap->ops->sff_irq_clear(ap); 1731 ap->ops->sff_irq_clear(ap);
@@ -1762,13 +1770,16 @@ EXPORT_SYMBOL_GPL(ata_sff_host_intr);
1762irqreturn_t ata_sff_interrupt(int irq, void *dev_instance) 1770irqreturn_t ata_sff_interrupt(int irq, void *dev_instance)
1763{ 1771{
1764 struct ata_host *host = dev_instance; 1772 struct ata_host *host = dev_instance;
1773 bool retried = false;
1765 unsigned int i; 1774 unsigned int i;
1766 unsigned int handled = 0, polling = 0; 1775 unsigned int handled, idle, polling;
1767 unsigned long flags; 1776 unsigned long flags;
1768 1777
1769 /* TODO: make _irqsave conditional on x86 PCI IDE legacy mode */ 1778 /* TODO: make _irqsave conditional on x86 PCI IDE legacy mode */
1770 spin_lock_irqsave(&host->lock, flags); 1779 spin_lock_irqsave(&host->lock, flags);
1771 1780
1781retry:
1782 handled = idle = polling = 0;
1772 for (i = 0; i < host->n_ports; i++) { 1783 for (i = 0; i < host->n_ports; i++) {
1773 struct ata_port *ap = host->ports[i]; 1784 struct ata_port *ap = host->ports[i];
1774 struct ata_queued_cmd *qc; 1785 struct ata_queued_cmd *qc;
@@ -1782,7 +1793,8 @@ irqreturn_t ata_sff_interrupt(int irq, void *dev_instance)
1782 handled |= ata_sff_host_intr(ap, qc); 1793 handled |= ata_sff_host_intr(ap, qc);
1783 else 1794 else
1784 polling |= 1 << i; 1795 polling |= 1 << i;
1785 } 1796 } else
1797 idle |= 1 << i;
1786 } 1798 }
1787 1799
1788 /* 1800 /*
@@ -1790,7 +1802,9 @@ irqreturn_t ata_sff_interrupt(int irq, void *dev_instance)
1790 * asserting IRQ line, nobody cared will ensue. Check IRQ 1802 * asserting IRQ line, nobody cared will ensue. Check IRQ
1791 * pending status if available and clear spurious IRQ. 1803 * pending status if available and clear spurious IRQ.
1792 */ 1804 */
1793 if (!handled) { 1805 if (!handled && !retried) {
1806 bool retry = false;
1807
1794 for (i = 0; i < host->n_ports; i++) { 1808 for (i = 0; i < host->n_ports; i++) {
1795 struct ata_port *ap = host->ports[i]; 1809 struct ata_port *ap = host->ports[i];
1796 1810
@@ -1805,8 +1819,23 @@ irqreturn_t ata_sff_interrupt(int irq, void *dev_instance)
1805 ata_port_printk(ap, KERN_INFO, 1819 ata_port_printk(ap, KERN_INFO,
1806 "clearing spurious IRQ\n"); 1820 "clearing spurious IRQ\n");
1807 1821
1808 ap->ops->sff_check_status(ap); 1822 if (idle & (1 << i)) {
1809 ap->ops->sff_irq_clear(ap); 1823 ap->ops->sff_check_status(ap);
1824 ap->ops->sff_irq_clear(ap);
1825 } else {
1826 /* clear INTRQ and check if BUSY cleared */
1827 if (!(ap->ops->sff_check_status(ap) & ATA_BUSY))
1828 retry |= true;
1829 /*
1830 * With command in flight, we can't do
1831 * sff_irq_clear() w/o racing with completion.
1832 */
1833 }
1834 }
1835
1836 if (retry) {
1837 retried = true;
1838 goto retry;
1810 } 1839 }
1811 } 1840 }
1812 1841