diff options
author | Ralph Campbell <ralph.campbell@qlogic.com> | 2007-07-24 16:55:39 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2007-10-09 23:04:14 -0400 |
commit | 210d6ca3db058cd1d6e6fd235ee3e25d6ac221cd (patch) | |
tree | 3fb104498d7b4144a1da0ca9d4a2241560a96ef4 /drivers/infiniband/hw/ipath/ipath_verbs.c | |
parent | 327a338d4fd018d33e7cacde46c0d82622b4bda8 (diff) |
IB/ipath: Performance optimization for CPU differences
Different processors have different ordering restrictions for write
combining. By taking advantage of this, we can eliminate some write
barriers when writing to the send buffers.
Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/ipath/ipath_verbs.c')
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_verbs.c | 62 |
1 files changed, 37 insertions, 25 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 16aa61fd8085..559d4a662937 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c | |||
@@ -631,7 +631,7 @@ static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) | |||
631 | #endif | 631 | #endif |
632 | 632 | ||
633 | static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, | 633 | static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, |
634 | u32 length) | 634 | u32 length, unsigned flush_wc) |
635 | { | 635 | { |
636 | u32 extra = 0; | 636 | u32 extra = 0; |
637 | u32 data = 0; | 637 | u32 data = 0; |
@@ -757,11 +757,14 @@ static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, | |||
757 | } | 757 | } |
758 | /* Update address before sending packet. */ | 758 | /* Update address before sending packet. */ |
759 | update_sge(ss, length); | 759 | update_sge(ss, length); |
760 | /* must flush early everything before trigger word */ | 760 | if (flush_wc) { |
761 | ipath_flush_wc(); | 761 | /* must flush early everything before trigger word */ |
762 | __raw_writel(last, piobuf); | 762 | ipath_flush_wc(); |
763 | /* be sure trigger word is written */ | 763 | __raw_writel(last, piobuf); |
764 | ipath_flush_wc(); | 764 | /* be sure trigger word is written */ |
765 | ipath_flush_wc(); | ||
766 | } else | ||
767 | __raw_writel(last, piobuf); | ||
765 | } | 768 | } |
766 | 769 | ||
767 | /** | 770 | /** |
@@ -776,6 +779,7 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, | |||
776 | u32 *hdr, u32 len, struct ipath_sge_state *ss) | 779 | u32 *hdr, u32 len, struct ipath_sge_state *ss) |
777 | { | 780 | { |
778 | u32 __iomem *piobuf; | 781 | u32 __iomem *piobuf; |
782 | unsigned flush_wc; | ||
779 | u32 plen; | 783 | u32 plen; |
780 | int ret; | 784 | int ret; |
781 | 785 | ||
@@ -799,47 +803,55 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, | |||
799 | * or WC buffer can be written out of order. | 803 | * or WC buffer can be written out of order. |
800 | */ | 804 | */ |
801 | writeq(plen, piobuf); | 805 | writeq(plen, piobuf); |
802 | ipath_flush_wc(); | ||
803 | piobuf += 2; | 806 | piobuf += 2; |
807 | |||
808 | flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC; | ||
804 | if (len == 0) { | 809 | if (len == 0) { |
805 | /* | 810 | /* |
806 | * If there is just the header portion, must flush before | 811 | * If there is just the header portion, must flush before |
807 | * writing last word of header for correctness, and after | 812 | * writing last word of header for correctness, and after |
808 | * the last header word (trigger word). | 813 | * the last header word (trigger word). |
809 | */ | 814 | */ |
810 | __iowrite32_copy(piobuf, hdr, hdrwords - 1); | 815 | if (flush_wc) { |
811 | ipath_flush_wc(); | 816 | ipath_flush_wc(); |
812 | __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); | 817 | __iowrite32_copy(piobuf, hdr, hdrwords - 1); |
813 | ipath_flush_wc(); | 818 | ipath_flush_wc(); |
814 | ret = 0; | 819 | __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); |
815 | goto bail; | 820 | ipath_flush_wc(); |
821 | } else | ||
822 | __iowrite32_copy(piobuf, hdr, hdrwords); | ||
823 | goto done; | ||
816 | } | 824 | } |
817 | 825 | ||
826 | if (flush_wc) | ||
827 | ipath_flush_wc(); | ||
818 | __iowrite32_copy(piobuf, hdr, hdrwords); | 828 | __iowrite32_copy(piobuf, hdr, hdrwords); |
819 | piobuf += hdrwords; | 829 | piobuf += hdrwords; |
820 | 830 | ||
821 | /* The common case is aligned and contained in one segment. */ | 831 | /* The common case is aligned and contained in one segment. */ |
822 | if (likely(ss->num_sge == 1 && len <= ss->sge.length && | 832 | if (likely(ss->num_sge == 1 && len <= ss->sge.length && |
823 | !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { | 833 | !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { |
824 | u32 w; | 834 | u32 dwords; |
825 | u32 *addr = (u32 *) ss->sge.vaddr; | 835 | u32 *addr = (u32 *) ss->sge.vaddr; |
826 | 836 | ||
827 | /* Update address before sending packet. */ | 837 | /* Update address before sending packet. */ |
828 | update_sge(ss, len); | 838 | update_sge(ss, len); |
829 | /* Need to round up for the last dword in the packet. */ | 839 | /* Need to round up for the last dword in the packet. */ |
830 | w = (len + 3) >> 2; | 840 | dwords = (len + 3) >> 2; |
831 | __iowrite32_copy(piobuf, addr, w - 1); | 841 | if (flush_wc) { |
832 | /* must flush early everything before trigger word */ | 842 | __iowrite32_copy(piobuf, addr, dwords - 1); |
833 | ipath_flush_wc(); | 843 | /* must flush early everything before trigger word */ |
834 | __raw_writel(addr[w - 1], piobuf + w - 1); | 844 | ipath_flush_wc(); |
835 | /* be sure trigger word is written */ | 845 | __raw_writel(addr[dwords - 1], piobuf + dwords - 1); |
836 | ipath_flush_wc(); | 846 | /* be sure trigger word is written */ |
837 | ret = 0; | 847 | ipath_flush_wc(); |
838 | goto bail; | 848 | } else |
849 | __iowrite32_copy(piobuf, addr, dwords); | ||
850 | goto done; | ||
839 | } | 851 | } |
840 | copy_io(piobuf, ss, len); | 852 | copy_io(piobuf, ss, len, flush_wc); |
853 | done: | ||
841 | ret = 0; | 854 | ret = 0; |
842 | |||
843 | bail: | 855 | bail: |
844 | return ret; | 856 | return ret; |
845 | } | 857 | } |