aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMing Lei <ming.lei@canonical.com>2011-09-02 09:24:23 -0400
committerPaolo Pisati <paolo.pisati@canonical.com>2012-08-17 04:19:32 -0400
commit351274f7650fb438589045dd9c06a28384beadb5 (patch)
tree1be7c0b783bcc0fdddddffccbb4e431b557832ef
parentde91be04948dc269b06b25cffb5af798f1ea577b (diff)
usb: ehci: make HC see up-to-date qh/qtd descriptor ASAP
This patch introduces the helper of ehci_sync_mem to flush qtd/qh into memory immediately on some ARM, so that HC can see the up-to-date qtd/qh descriptor asap. This patch fixs one performance bug on ARM Cortex A9 dual core platform, which has been reported on quite a few ARM machines (OMAP4, Tegra 2, snowball...), see details from link of https://bugs.launchpad.net/bugs/709245. The patch has been tested ok on OMAP4 panda A1 board, and the performance of 'dd' over usb mass storage can be increased from 4~5MB/sec to 14~16MB/sec after applying this patch. SRU Justification: Impact: - without the patch, 'dd' over usb mass storage is about 4~5MB/sec. Fix: - After applying the patch, 'dd' over usb mass storage is about 14~16MB/sec. BugLink: http://bugs.launchpad.net/bugs/709245 upstream discusstion: https://patchwork.kernel.org/patch/1113332/ Signed-off-by: Ming Lei <ming.lei@canonical.com> Signed-off-by: Paolo Pisati <paolo.pisati@canonical.com>
-rw-r--r--drivers/usb/host/ehci-q.c18
-rw-r--r--drivers/usb/host/ehci.h17
2 files changed, 35 insertions, 0 deletions
diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c
index 2499b3bce36..9dd7c1c44f4 100644
--- a/drivers/usb/host/ehci-q.c
+++ b/drivers/usb/host/ehci-q.c
@@ -995,6 +995,12 @@ static void qh_link_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
995 head->qh_next.qh = qh; 995 head->qh_next.qh = qh;
996 head->hw->hw_next = dma; 996 head->hw->hw_next = dma;
997 997
998 /*
999 * flush qh descriptor into memory immediately,
1000 * see comments in qh_append_tds.
1001 */
1002 ehci_sync_mem();
1003
998 qh_get(qh); 1004 qh_get(qh);
999 qh->xacterrs = 0; 1005 qh->xacterrs = 0;
1000 qh->qh_state = QH_STATE_LINKED; 1006 qh->qh_state = QH_STATE_LINKED;
@@ -1082,6 +1088,18 @@ static struct ehci_qh *qh_append_tds (
1082 wmb (); 1088 wmb ();
1083 dummy->hw_token = token; 1089 dummy->hw_token = token;
1084 1090
1091 /*
1092 * Writing to dma coherent buffer on ARM may
1093 * be delayed to reach memory, so HC may not see
1094 * hw_token of dummy qtd in time, which can cause
1095 * the qtd transaction to be executed very late,
1096 * and degrade performance a lot. ehci_sync_mem
1097 * is added to flush 'token' immediatelly into
1098 * memory, so that ehci can execute the transaction
1099 * ASAP.
1100 */
1101 ehci_sync_mem();
1102
1085 urb->hcpriv = qh_get (qh); 1103 urb->hcpriv = qh_get (qh);
1086 } 1104 }
1087 } 1105 }
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index 3ffb27f472c..8281312f187 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -753,6 +753,23 @@ static inline unsigned ehci_read_frame_index(struct ehci_hcd *ehci)
753 753
754#endif 754#endif
755 755
756/*
757 * Writing to dma coherent memory on ARM may be delayed via L2
758 * writing buffer, so introduce the helper which can flush L2 writing
759 * buffer into memory immediately, especially used to flush ehci
760 * descriptor to memory.
761 */
762#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
763static inline void ehci_sync_mem()
764{
765 mb();
766}
767#else
768static inline void ehci_sync_mem()
769{
770}
771#endif
772
756/*-------------------------------------------------------------------------*/ 773/*-------------------------------------------------------------------------*/
757 774
758#ifndef DEBUG 775#ifndef DEBUG