aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/ipath/ipath_verbs.c
diff options
context:
space:
mode:
authorRalph Campbell <ralph.campbell@qlogic.com>2007-07-24 16:55:39 -0400
committerRoland Dreier <rolandd@cisco.com>2007-10-09 23:04:14 -0400
commit210d6ca3db058cd1d6e6fd235ee3e25d6ac221cd (patch)
tree3fb104498d7b4144a1da0ca9d4a2241560a96ef4 /drivers/infiniband/hw/ipath/ipath_verbs.c
parent327a338d4fd018d33e7cacde46c0d82622b4bda8 (diff)
IB/ipath: Performance optimization for CPU differences
Different processors have different ordering restrictions for write combining. By taking advantage of this, we can eliminate some write barriers when writing to the send buffers. Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/ipath/ipath_verbs.c')
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c62
1 files changed, 37 insertions, 25 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 16aa61fd8085..559d4a662937 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -631,7 +631,7 @@ static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
631#endif 631#endif
632 632
633static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, 633static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
634 u32 length) 634 u32 length, unsigned flush_wc)
635{ 635{
636 u32 extra = 0; 636 u32 extra = 0;
637 u32 data = 0; 637 u32 data = 0;
@@ -757,11 +757,14 @@ static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
757 } 757 }
758 /* Update address before sending packet. */ 758 /* Update address before sending packet. */
759 update_sge(ss, length); 759 update_sge(ss, length);
760 /* must flush early everything before trigger word */ 760 if (flush_wc) {
761 ipath_flush_wc(); 761 /* must flush early everything before trigger word */
762 __raw_writel(last, piobuf); 762 ipath_flush_wc();
763 /* be sure trigger word is written */ 763 __raw_writel(last, piobuf);
764 ipath_flush_wc(); 764 /* be sure trigger word is written */
765 ipath_flush_wc();
766 } else
767 __raw_writel(last, piobuf);
765} 768}
766 769
767/** 770/**
@@ -776,6 +779,7 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
776 u32 *hdr, u32 len, struct ipath_sge_state *ss) 779 u32 *hdr, u32 len, struct ipath_sge_state *ss)
777{ 780{
778 u32 __iomem *piobuf; 781 u32 __iomem *piobuf;
782 unsigned flush_wc;
779 u32 plen; 783 u32 plen;
780 int ret; 784 int ret;
781 785
@@ -799,47 +803,55 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
799 * or WC buffer can be written out of order. 803 * or WC buffer can be written out of order.
800 */ 804 */
801 writeq(plen, piobuf); 805 writeq(plen, piobuf);
802 ipath_flush_wc();
803 piobuf += 2; 806 piobuf += 2;
807
808 flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
804 if (len == 0) { 809 if (len == 0) {
805 /* 810 /*
806 * If there is just the header portion, must flush before 811 * If there is just the header portion, must flush before
807 * writing last word of header for correctness, and after 812 * writing last word of header for correctness, and after
808 * the last header word (trigger word). 813 * the last header word (trigger word).
809 */ 814 */
810 __iowrite32_copy(piobuf, hdr, hdrwords - 1); 815 if (flush_wc) {
811 ipath_flush_wc(); 816 ipath_flush_wc();
812 __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); 817 __iowrite32_copy(piobuf, hdr, hdrwords - 1);
813 ipath_flush_wc(); 818 ipath_flush_wc();
814 ret = 0; 819 __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
815 goto bail; 820 ipath_flush_wc();
821 } else
822 __iowrite32_copy(piobuf, hdr, hdrwords);
823 goto done;
816 } 824 }
817 825
826 if (flush_wc)
827 ipath_flush_wc();
818 __iowrite32_copy(piobuf, hdr, hdrwords); 828 __iowrite32_copy(piobuf, hdr, hdrwords);
819 piobuf += hdrwords; 829 piobuf += hdrwords;
820 830
821 /* The common case is aligned and contained in one segment. */ 831 /* The common case is aligned and contained in one segment. */
822 if (likely(ss->num_sge == 1 && len <= ss->sge.length && 832 if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
823 !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { 833 !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
824 u32 w; 834 u32 dwords;
825 u32 *addr = (u32 *) ss->sge.vaddr; 835 u32 *addr = (u32 *) ss->sge.vaddr;
826 836
827 /* Update address before sending packet. */ 837 /* Update address before sending packet. */
828 update_sge(ss, len); 838 update_sge(ss, len);
829 /* Need to round up for the last dword in the packet. */ 839 /* Need to round up for the last dword in the packet. */
830 w = (len + 3) >> 2; 840 dwords = (len + 3) >> 2;
831 __iowrite32_copy(piobuf, addr, w - 1); 841 if (flush_wc) {
832 /* must flush early everything before trigger word */ 842 __iowrite32_copy(piobuf, addr, dwords - 1);
833 ipath_flush_wc(); 843 /* must flush early everything before trigger word */
834 __raw_writel(addr[w - 1], piobuf + w - 1); 844 ipath_flush_wc();
835 /* be sure trigger word is written */ 845 __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
836 ipath_flush_wc(); 846 /* be sure trigger word is written */
837 ret = 0; 847 ipath_flush_wc();
838 goto bail; 848 } else
849 __iowrite32_copy(piobuf, addr, dwords);
850 goto done;
839 } 851 }
840 copy_io(piobuf, ss, len); 852 copy_io(piobuf, ss, len, flush_wc);
853done:
841 ret = 0; 854 ret = 0;
842
843bail: 855bail:
844 return ret; 856 return ret;
845} 857}