diff options
author | Ben Hutchings <bhutchings@solarflare.com> | 2011-02-22 12:26:10 -0500 |
---|---|---|
committer | Ben Hutchings <bhutchings@solarflare.com> | 2011-03-04 12:58:42 -0500 |
commit | 65f0b417dee94f779ce9b77102b7d73c93723b39 (patch) | |
tree | 390279203a8c73a986d15be5cc30f9bb2e95c1e8 | |
parent | 6d84b986b26bac1d4d678ff10c10a633bf53f834 (diff) |
sfc: Use write-combining to reduce TX latency
Based on work by Neil Turton <nturton@solarflare.com> and
Kieran Mansley <kmansley@solarflare.com>.
The BIU has now been verified to handle 3- and 4-dword writes within a
single 128-bit register correctly. This means we can enable write-
combining and only insert write barriers between writes to distinct
registers.
This has been observed to save about 0.5 us when pushing a TX
descriptor to an empty TX queue.
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
-rw-r--r-- | drivers/net/sfc/efx.c | 4 | ||||
-rw-r--r-- | drivers/net/sfc/io.h | 13 | ||||
-rw-r--r-- | drivers/net/sfc/mcdi.c | 9 |
3 files changed, 16 insertions, 10 deletions
diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c index d563049859a8..b8bd936374f2 100644 --- a/drivers/net/sfc/efx.c +++ b/drivers/net/sfc/efx.c | |||
@@ -1104,8 +1104,8 @@ static int efx_init_io(struct efx_nic *efx) | |||
1104 | rc = -EIO; | 1104 | rc = -EIO; |
1105 | goto fail3; | 1105 | goto fail3; |
1106 | } | 1106 | } |
1107 | efx->membase = ioremap_nocache(efx->membase_phys, | 1107 | efx->membase = ioremap_wc(efx->membase_phys, |
1108 | efx->type->mem_map_size); | 1108 | efx->type->mem_map_size); |
1109 | if (!efx->membase) { | 1109 | if (!efx->membase) { |
1110 | netif_err(efx, probe, efx->net_dev, | 1110 | netif_err(efx, probe, efx->net_dev, |
1111 | "could not map memory BAR at %llx+%x\n", | 1111 | "could not map memory BAR at %llx+%x\n", |
diff --git a/drivers/net/sfc/io.h b/drivers/net/sfc/io.h index dc45110b2456..d9d8c2ef1074 100644 --- a/drivers/net/sfc/io.h +++ b/drivers/net/sfc/io.h | |||
@@ -48,9 +48,9 @@ | |||
48 | * replacing the low 96 bits with zero does not affect functionality. | 48 | * replacing the low 96 bits with zero does not affect functionality. |
49 | * - If the host writes to the last dword address of such a register | 49 | * - If the host writes to the last dword address of such a register |
50 | * (i.e. the high 32 bits) the underlying register will always be | 50 | * (i.e. the high 32 bits) the underlying register will always be |
51 | * written. If the collector does not hold values for the low 96 | 51 | * written. If the collector and the current write together do not |
52 | * bits of the register, they will be written as zero. Writing to | 52 | * provide values for all 128 bits of the register, the low 96 bits |
53 | * the last qword does not have this effect and must not be done. | 53 | * will be written as zero. |
54 | * - If the host writes to the address of any other part of such a | 54 | * - If the host writes to the address of any other part of such a |
55 | * register while the collector already holds values for some other | 55 | * register while the collector already holds values for some other |
56 | * register, the write is discarded and the collector maintains its | 56 | * register, the write is discarded and the collector maintains its |
@@ -103,6 +103,7 @@ static inline void efx_writeo(struct efx_nic *efx, efx_oword_t *value, | |||
103 | _efx_writed(efx, value->u32[2], reg + 8); | 103 | _efx_writed(efx, value->u32[2], reg + 8); |
104 | _efx_writed(efx, value->u32[3], reg + 12); | 104 | _efx_writed(efx, value->u32[3], reg + 12); |
105 | #endif | 105 | #endif |
106 | wmb(); | ||
106 | mmiowb(); | 107 | mmiowb(); |
107 | spin_unlock_irqrestore(&efx->biu_lock, flags); | 108 | spin_unlock_irqrestore(&efx->biu_lock, flags); |
108 | } | 109 | } |
@@ -125,6 +126,7 @@ static inline void efx_sram_writeq(struct efx_nic *efx, void __iomem *membase, | |||
125 | __raw_writel((__force u32)value->u32[0], membase + addr); | 126 | __raw_writel((__force u32)value->u32[0], membase + addr); |
126 | __raw_writel((__force u32)value->u32[1], membase + addr + 4); | 127 | __raw_writel((__force u32)value->u32[1], membase + addr + 4); |
127 | #endif | 128 | #endif |
129 | wmb(); | ||
128 | mmiowb(); | 130 | mmiowb(); |
129 | spin_unlock_irqrestore(&efx->biu_lock, flags); | 131 | spin_unlock_irqrestore(&efx->biu_lock, flags); |
130 | } | 132 | } |
@@ -139,6 +141,7 @@ static inline void efx_writed(struct efx_nic *efx, efx_dword_t *value, | |||
139 | 141 | ||
140 | /* No lock required */ | 142 | /* No lock required */ |
141 | _efx_writed(efx, value->u32[0], reg); | 143 | _efx_writed(efx, value->u32[0], reg); |
144 | wmb(); | ||
142 | } | 145 | } |
143 | 146 | ||
144 | /* Read a 128-bit CSR, locking as appropriate. */ | 147 | /* Read a 128-bit CSR, locking as appropriate. */ |
@@ -237,12 +240,14 @@ static inline void _efx_writeo_page(struct efx_nic *efx, efx_oword_t *value, | |||
237 | 240 | ||
238 | #ifdef EFX_USE_QWORD_IO | 241 | #ifdef EFX_USE_QWORD_IO |
239 | _efx_writeq(efx, value->u64[0], reg + 0); | 242 | _efx_writeq(efx, value->u64[0], reg + 0); |
243 | _efx_writeq(efx, value->u64[1], reg + 8); | ||
240 | #else | 244 | #else |
241 | _efx_writed(efx, value->u32[0], reg + 0); | 245 | _efx_writed(efx, value->u32[0], reg + 0); |
242 | _efx_writed(efx, value->u32[1], reg + 4); | 246 | _efx_writed(efx, value->u32[1], reg + 4); |
243 | #endif | ||
244 | _efx_writed(efx, value->u32[2], reg + 8); | 247 | _efx_writed(efx, value->u32[2], reg + 8); |
245 | _efx_writed(efx, value->u32[3], reg + 12); | 248 | _efx_writed(efx, value->u32[3], reg + 12); |
249 | #endif | ||
250 | wmb(); | ||
246 | } | 251 | } |
247 | #define efx_writeo_page(efx, value, reg, page) \ | 252 | #define efx_writeo_page(efx, value, reg, page) \ |
248 | _efx_writeo_page(efx, value, \ | 253 | _efx_writeo_page(efx, value, \ |
diff --git a/drivers/net/sfc/mcdi.c b/drivers/net/sfc/mcdi.c index 8bba8955f310..5e118f0d2479 100644 --- a/drivers/net/sfc/mcdi.c +++ b/drivers/net/sfc/mcdi.c | |||
@@ -94,14 +94,15 @@ static void efx_mcdi_copyin(struct efx_nic *efx, unsigned cmd, | |||
94 | 94 | ||
95 | efx_writed(efx, &hdr, pdu); | 95 | efx_writed(efx, &hdr, pdu); |
96 | 96 | ||
97 | for (i = 0; i < inlen; i += 4) | 97 | for (i = 0; i < inlen; i += 4) { |
98 | _efx_writed(efx, *((__le32 *)(inbuf + i)), pdu + 4 + i); | 98 | _efx_writed(efx, *((__le32 *)(inbuf + i)), pdu + 4 + i); |
99 | 99 | /* use wmb() within loop to inhibit write combining */ | |
100 | /* Ensure the payload is written out before the header */ | 100 | wmb(); |
101 | wmb(); | 101 | } |
102 | 102 | ||
103 | /* ring the doorbell with a distinctive value */ | 103 | /* ring the doorbell with a distinctive value */ |
104 | _efx_writed(efx, (__force __le32) 0x45789abc, doorbell); | 104 | _efx_writed(efx, (__force __le32) 0x45789abc, doorbell); |
105 | wmb(); | ||
105 | } | 106 | } |
106 | 107 | ||
107 | static void efx_mcdi_copyout(struct efx_nic *efx, u8 *outbuf, size_t outlen) | 108 | static void efx_mcdi_copyout(struct efx_nic *efx, u8 *outbuf, size_t outlen) |