aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorRalph Campbell <ralph.campbell@qlogic.com>2007-07-24 16:55:39 -0400
committerRoland Dreier <rolandd@cisco.com>2007-10-09 23:04:14 -0400
commit210d6ca3db058cd1d6e6fd235ee3e25d6ac221cd (patch)
tree3fb104498d7b4144a1da0ca9d4a2241560a96ef4 /drivers/infiniband
parent327a338d4fd018d33e7cacde46c0d82622b4bda8 (diff)
IB/ipath: Performance optimization for CPU differences
Different processors have different ordering restrictions for write combining. By taking advantage of this, we can eliminate some write barriers when writing to the send buffers. Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c22
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c62
4 files changed, 53 insertions, 35 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index cf25cdab02f9..4137c7770f1b 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -446,19 +446,21 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
446 dd->ipath_unit, plen - 1, pbufn); 446 dd->ipath_unit, plen - 1, pbufn);
447 447
448 if (dp.pbc_wd == 0) 448 if (dp.pbc_wd == 0)
449 /* Legacy operation, use computed pbc_wd */
450 dp.pbc_wd = plen; 449 dp.pbc_wd = plen;
451
452 /* we have to flush after the PBC for correctness on some cpus
453 * or WC buffer can be written out of order */
454 writeq(dp.pbc_wd, piobuf); 450 writeq(dp.pbc_wd, piobuf);
455 ipath_flush_wc(); 451 /*
456 /* copy all by the trigger word, then flush, so it's written 452 * Copy all by the trigger word, then flush, so it's written
457 * to chip before trigger word, then write trigger word, then 453 * to chip before trigger word, then write trigger word, then
458 * flush again, so packet is sent. */ 454 * flush again, so packet is sent.
459 __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1); 455 */
460 ipath_flush_wc(); 456 if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
461 __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1); 457 ipath_flush_wc();
458 __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
459 ipath_flush_wc();
460 __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
461 } else
462 __iowrite32_copy(piobuf + 2, tmpbuf, clen);
463
462 ipath_flush_wc(); 464 ipath_flush_wc();
463 465
464 ret = sizeof(dp); 466 ret = sizeof(dp);
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 5b6ac9a1a709..a324c6f7aeba 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -1273,6 +1273,8 @@ static void ipath_pe_tidtemplate(struct ipath_devdata *dd)
1273static int ipath_pe_early_init(struct ipath_devdata *dd) 1273static int ipath_pe_early_init(struct ipath_devdata *dd)
1274{ 1274{
1275 dd->ipath_flags |= IPATH_4BYTE_TID; 1275 dd->ipath_flags |= IPATH_4BYTE_TID;
1276 if (ipath_unordered_wc())
1277 dd->ipath_flags |= IPATH_PIO_FLUSH_WC;
1276 1278
1277 /* 1279 /*
1278 * For openfabrics, we need to be able to handle an IB header of 1280 * For openfabrics, we need to be able to handle an IB header of
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 7a7966f7e4ff..d983f92b9bcb 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -724,6 +724,8 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
724#define IPATH_LINKACTIVE 0x200 724#define IPATH_LINKACTIVE 0x200
725 /* link current state is unknown */ 725 /* link current state is unknown */
726#define IPATH_LINKUNK 0x400 726#define IPATH_LINKUNK 0x400
727 /* Write combining flush needed for PIO */
728#define IPATH_PIO_FLUSH_WC 0x1000
727 /* no IB cable, or no device on IB cable */ 729 /* no IB cable, or no device on IB cable */
728#define IPATH_NOCABLE 0x4000 730#define IPATH_NOCABLE 0x4000
729 /* Supports port zero per packet receive interrupts via 731 /* Supports port zero per packet receive interrupts via
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 16aa61fd8085..559d4a662937 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -631,7 +631,7 @@ static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
631#endif 631#endif
632 632
633static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, 633static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
634 u32 length) 634 u32 length, unsigned flush_wc)
635{ 635{
636 u32 extra = 0; 636 u32 extra = 0;
637 u32 data = 0; 637 u32 data = 0;
@@ -757,11 +757,14 @@ static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
757 } 757 }
758 /* Update address before sending packet. */ 758 /* Update address before sending packet. */
759 update_sge(ss, length); 759 update_sge(ss, length);
760 /* must flush early everything before trigger word */ 760 if (flush_wc) {
761 ipath_flush_wc(); 761 /* must flush early everything before trigger word */
762 __raw_writel(last, piobuf); 762 ipath_flush_wc();
763 /* be sure trigger word is written */ 763 __raw_writel(last, piobuf);
764 ipath_flush_wc(); 764 /* be sure trigger word is written */
765 ipath_flush_wc();
766 } else
767 __raw_writel(last, piobuf);
765} 768}
766 769
767/** 770/**
@@ -776,6 +779,7 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
776 u32 *hdr, u32 len, struct ipath_sge_state *ss) 779 u32 *hdr, u32 len, struct ipath_sge_state *ss)
777{ 780{
778 u32 __iomem *piobuf; 781 u32 __iomem *piobuf;
782 unsigned flush_wc;
779 u32 plen; 783 u32 plen;
780 int ret; 784 int ret;
781 785
@@ -799,47 +803,55 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
799 * or WC buffer can be written out of order. 803 * or WC buffer can be written out of order.
800 */ 804 */
801 writeq(plen, piobuf); 805 writeq(plen, piobuf);
802 ipath_flush_wc();
803 piobuf += 2; 806 piobuf += 2;
807
808 flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
804 if (len == 0) { 809 if (len == 0) {
805 /* 810 /*
806 * If there is just the header portion, must flush before 811 * If there is just the header portion, must flush before
807 * writing last word of header for correctness, and after 812 * writing last word of header for correctness, and after
808 * the last header word (trigger word). 813 * the last header word (trigger word).
809 */ 814 */
810 __iowrite32_copy(piobuf, hdr, hdrwords - 1); 815 if (flush_wc) {
811 ipath_flush_wc(); 816 ipath_flush_wc();
812 __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); 817 __iowrite32_copy(piobuf, hdr, hdrwords - 1);
813 ipath_flush_wc(); 818 ipath_flush_wc();
814 ret = 0; 819 __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
815 goto bail; 820 ipath_flush_wc();
821 } else
822 __iowrite32_copy(piobuf, hdr, hdrwords);
823 goto done;
816 } 824 }
817 825
826 if (flush_wc)
827 ipath_flush_wc();
818 __iowrite32_copy(piobuf, hdr, hdrwords); 828 __iowrite32_copy(piobuf, hdr, hdrwords);
819 piobuf += hdrwords; 829 piobuf += hdrwords;
820 830
821 /* The common case is aligned and contained in one segment. */ 831 /* The common case is aligned and contained in one segment. */
822 if (likely(ss->num_sge == 1 && len <= ss->sge.length && 832 if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
823 !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { 833 !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
824 u32 w; 834 u32 dwords;
825 u32 *addr = (u32 *) ss->sge.vaddr; 835 u32 *addr = (u32 *) ss->sge.vaddr;
826 836
827 /* Update address before sending packet. */ 837 /* Update address before sending packet. */
828 update_sge(ss, len); 838 update_sge(ss, len);
829 /* Need to round up for the last dword in the packet. */ 839 /* Need to round up for the last dword in the packet. */
830 w = (len + 3) >> 2; 840 dwords = (len + 3) >> 2;
831 __iowrite32_copy(piobuf, addr, w - 1); 841 if (flush_wc) {
832 /* must flush early everything before trigger word */ 842 __iowrite32_copy(piobuf, addr, dwords - 1);
833 ipath_flush_wc(); 843 /* must flush early everything before trigger word */
834 __raw_writel(addr[w - 1], piobuf + w - 1); 844 ipath_flush_wc();
835 /* be sure trigger word is written */ 845 __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
836 ipath_flush_wc(); 846 /* be sure trigger word is written */
837 ret = 0; 847 ipath_flush_wc();
838 goto bail; 848 } else
849 __iowrite32_copy(piobuf, addr, dwords);
850 goto done;
839 } 851 }
840 copy_io(piobuf, ss, len); 852 copy_io(piobuf, ss, len, flush_wc);
853done:
841 ret = 0; 854 ret = 0;
842
843bail: 855bail:
844 return ret; 856 return ret;
845} 857}