aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBen Hutchings <bhutchings@solarflare.com>2011-02-22 12:26:10 -0500
committerBen Hutchings <bhutchings@solarflare.com>2011-03-04 12:58:42 -0500
commit65f0b417dee94f779ce9b77102b7d73c93723b39 (patch)
tree390279203a8c73a986d15be5cc30f9bb2e95c1e8
parent6d84b986b26bac1d4d678ff10c10a633bf53f834 (diff)
sfc: Use write-combining to reduce TX latency
Based on work by Neil Turton <nturton@solarflare.com> and Kieran Mansley <kmansley@solarflare.com>. The BIU has now been verified to handle 3- and 4-dword writes within a single 128-bit register correctly. This means we can enable write- combining and only insert write barriers between writes to distinct registers. This has been observed to save about 0.5 us when pushing a TX descriptor to an empty TX queue. Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
-rw-r--r--drivers/net/sfc/efx.c4
-rw-r--r--drivers/net/sfc/io.h13
-rw-r--r--drivers/net/sfc/mcdi.c9
3 files changed, 16 insertions, 10 deletions
diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c
index d563049859a8..b8bd936374f2 100644
--- a/drivers/net/sfc/efx.c
+++ b/drivers/net/sfc/efx.c
@@ -1104,8 +1104,8 @@ static int efx_init_io(struct efx_nic *efx)
1104 rc = -EIO; 1104 rc = -EIO;
1105 goto fail3; 1105 goto fail3;
1106 } 1106 }
1107 efx->membase = ioremap_nocache(efx->membase_phys, 1107 efx->membase = ioremap_wc(efx->membase_phys,
1108 efx->type->mem_map_size); 1108 efx->type->mem_map_size);
1109 if (!efx->membase) { 1109 if (!efx->membase) {
1110 netif_err(efx, probe, efx->net_dev, 1110 netif_err(efx, probe, efx->net_dev,
1111 "could not map memory BAR at %llx+%x\n", 1111 "could not map memory BAR at %llx+%x\n",
diff --git a/drivers/net/sfc/io.h b/drivers/net/sfc/io.h
index dc45110b2456..d9d8c2ef1074 100644
--- a/drivers/net/sfc/io.h
+++ b/drivers/net/sfc/io.h
@@ -48,9 +48,9 @@
48 * replacing the low 96 bits with zero does not affect functionality. 48 * replacing the low 96 bits with zero does not affect functionality.
49 * - If the host writes to the last dword address of such a register 49 * - If the host writes to the last dword address of such a register
50 * (i.e. the high 32 bits) the underlying register will always be 50 * (i.e. the high 32 bits) the underlying register will always be
51 * written. If the collector does not hold values for the low 96 51 * written. If the collector and the current write together do not
52 * bits of the register, they will be written as zero. Writing to 52 * provide values for all 128 bits of the register, the low 96 bits
53 * the last qword does not have this effect and must not be done. 53 * will be written as zero.
54 * - If the host writes to the address of any other part of such a 54 * - If the host writes to the address of any other part of such a
55 * register while the collector already holds values for some other 55 * register while the collector already holds values for some other
56 * register, the write is discarded and the collector maintains its 56 * register, the write is discarded and the collector maintains its
@@ -103,6 +103,7 @@ static inline void efx_writeo(struct efx_nic *efx, efx_oword_t *value,
103 _efx_writed(efx, value->u32[2], reg + 8); 103 _efx_writed(efx, value->u32[2], reg + 8);
104 _efx_writed(efx, value->u32[3], reg + 12); 104 _efx_writed(efx, value->u32[3], reg + 12);
105#endif 105#endif
106 wmb();
106 mmiowb(); 107 mmiowb();
107 spin_unlock_irqrestore(&efx->biu_lock, flags); 108 spin_unlock_irqrestore(&efx->biu_lock, flags);
108} 109}
@@ -125,6 +126,7 @@ static inline void efx_sram_writeq(struct efx_nic *efx, void __iomem *membase,
125 __raw_writel((__force u32)value->u32[0], membase + addr); 126 __raw_writel((__force u32)value->u32[0], membase + addr);
126 __raw_writel((__force u32)value->u32[1], membase + addr + 4); 127 __raw_writel((__force u32)value->u32[1], membase + addr + 4);
127#endif 128#endif
129 wmb();
128 mmiowb(); 130 mmiowb();
129 spin_unlock_irqrestore(&efx->biu_lock, flags); 131 spin_unlock_irqrestore(&efx->biu_lock, flags);
130} 132}
@@ -139,6 +141,7 @@ static inline void efx_writed(struct efx_nic *efx, efx_dword_t *value,
139 141
140 /* No lock required */ 142 /* No lock required */
141 _efx_writed(efx, value->u32[0], reg); 143 _efx_writed(efx, value->u32[0], reg);
144 wmb();
142} 145}
143 146
144/* Read a 128-bit CSR, locking as appropriate. */ 147/* Read a 128-bit CSR, locking as appropriate. */
@@ -237,12 +240,14 @@ static inline void _efx_writeo_page(struct efx_nic *efx, efx_oword_t *value,
237 240
238#ifdef EFX_USE_QWORD_IO 241#ifdef EFX_USE_QWORD_IO
239 _efx_writeq(efx, value->u64[0], reg + 0); 242 _efx_writeq(efx, value->u64[0], reg + 0);
243 _efx_writeq(efx, value->u64[1], reg + 8);
240#else 244#else
241 _efx_writed(efx, value->u32[0], reg + 0); 245 _efx_writed(efx, value->u32[0], reg + 0);
242 _efx_writed(efx, value->u32[1], reg + 4); 246 _efx_writed(efx, value->u32[1], reg + 4);
243#endif
244 _efx_writed(efx, value->u32[2], reg + 8); 247 _efx_writed(efx, value->u32[2], reg + 8);
245 _efx_writed(efx, value->u32[3], reg + 12); 248 _efx_writed(efx, value->u32[3], reg + 12);
249#endif
250 wmb();
246} 251}
247#define efx_writeo_page(efx, value, reg, page) \ 252#define efx_writeo_page(efx, value, reg, page) \
248 _efx_writeo_page(efx, value, \ 253 _efx_writeo_page(efx, value, \
diff --git a/drivers/net/sfc/mcdi.c b/drivers/net/sfc/mcdi.c
index 8bba8955f310..5e118f0d2479 100644
--- a/drivers/net/sfc/mcdi.c
+++ b/drivers/net/sfc/mcdi.c
@@ -94,14 +94,15 @@ static void efx_mcdi_copyin(struct efx_nic *efx, unsigned cmd,
94 94
95 efx_writed(efx, &hdr, pdu); 95 efx_writed(efx, &hdr, pdu);
96 96
97 for (i = 0; i < inlen; i += 4) 97 for (i = 0; i < inlen; i += 4) {
98 _efx_writed(efx, *((__le32 *)(inbuf + i)), pdu + 4 + i); 98 _efx_writed(efx, *((__le32 *)(inbuf + i)), pdu + 4 + i);
99 99 /* use wmb() within loop to inhibit write combining */
100 /* Ensure the payload is written out before the header */ 100 wmb();
101 wmb(); 101 }
102 102
103 /* ring the doorbell with a distinctive value */ 103 /* ring the doorbell with a distinctive value */
104 _efx_writed(efx, (__force __le32) 0x45789abc, doorbell); 104 _efx_writed(efx, (__force __le32) 0x45789abc, doorbell);
105 wmb();
105} 106}
106 107
107static void efx_mcdi_copyout(struct efx_nic *efx, u8 *outbuf, size_t outlen) 108static void efx_mcdi_copyout(struct efx_nic *efx, u8 *outbuf, size_t outlen)