diff options
author | Felix Fietkau <nbd@openwrt.org> | 2015-03-23 07:35:37 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-03-23 16:56:28 -0400 |
commit | 9cde94506eacfcda570b6c304b8deae1a7191ee2 (patch) | |
tree | 49b92aa376335f2cdd7a60efa7ee2f055445c3a0 /drivers/net/ethernet/broadcom | |
parent | 45c9b3c09490ef4a4ac19df75b5919849423f3e7 (diff) |
bgmac: implement scatter/gather support
Always use software checksumming, since the hardware does not have any
checksum offload support.
This significantly improves local TCP tx performance.
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/broadcom')
-rw-r--r-- | drivers/net/ethernet/broadcom/bgmac.c | 164 |
1 files changed, 121 insertions, 43 deletions
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index c7da37aef035..fa8f9e147c34 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c | |||
@@ -115,53 +115,91 @@ static void bgmac_dma_tx_enable(struct bgmac *bgmac, | |||
115 | bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl); | 115 | bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl); |
116 | } | 116 | } |
117 | 117 | ||
118 | static void | ||
119 | bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring, | ||
120 | int i, int len, u32 ctl0) | ||
121 | { | ||
122 | struct bgmac_slot_info *slot; | ||
123 | struct bgmac_dma_desc *dma_desc; | ||
124 | u32 ctl1; | ||
125 | |||
126 | if (i == ring->num_slots - 1) | ||
127 | ctl0 |= BGMAC_DESC_CTL0_EOT; | ||
128 | |||
129 | ctl1 = len & BGMAC_DESC_CTL1_LEN; | ||
130 | |||
131 | slot = &ring->slots[i]; | ||
132 | dma_desc = &ring->cpu_base[i]; | ||
133 | dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr)); | ||
134 | dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr)); | ||
135 | dma_desc->ctl0 = cpu_to_le32(ctl0); | ||
136 | dma_desc->ctl1 = cpu_to_le32(ctl1); | ||
137 | } | ||
138 | |||
118 | static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac, | 139 | static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac, |
119 | struct bgmac_dma_ring *ring, | 140 | struct bgmac_dma_ring *ring, |
120 | struct sk_buff *skb) | 141 | struct sk_buff *skb) |
121 | { | 142 | { |
122 | struct device *dma_dev = bgmac->core->dma_dev; | 143 | struct device *dma_dev = bgmac->core->dma_dev; |
123 | struct net_device *net_dev = bgmac->net_dev; | 144 | struct net_device *net_dev = bgmac->net_dev; |
124 | struct bgmac_dma_desc *dma_desc; | 145 | struct bgmac_slot_info *slot = &ring->slots[ring->end]; |
125 | struct bgmac_slot_info *slot; | ||
126 | u32 ctl0, ctl1; | ||
127 | int free_slots; | 146 | int free_slots; |
147 | int nr_frags; | ||
148 | u32 flags; | ||
149 | int index = ring->end; | ||
150 | int i; | ||
128 | 151 | ||
129 | if (skb->len > BGMAC_DESC_CTL1_LEN) { | 152 | if (skb->len > BGMAC_DESC_CTL1_LEN) { |
130 | bgmac_err(bgmac, "Too long skb (%d)\n", skb->len); | 153 | bgmac_err(bgmac, "Too long skb (%d)\n", skb->len); |
131 | goto err_stop_drop; | 154 | goto err_drop; |
132 | } | 155 | } |
133 | 156 | ||
157 | if (skb->ip_summed == CHECKSUM_PARTIAL) | ||
158 | skb_checksum_help(skb); | ||
159 | |||
160 | nr_frags = skb_shinfo(skb)->nr_frags; | ||
161 | |||
134 | if (ring->start <= ring->end) | 162 | if (ring->start <= ring->end) |
135 | free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS; | 163 | free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS; |
136 | else | 164 | else |
137 | free_slots = ring->start - ring->end; | 165 | free_slots = ring->start - ring->end; |
138 | if (free_slots == 1) { | 166 | |
167 | if (free_slots <= nr_frags + 1) { | ||
139 | bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n"); | 168 | bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n"); |
140 | netif_stop_queue(net_dev); | 169 | netif_stop_queue(net_dev); |
141 | return NETDEV_TX_BUSY; | 170 | return NETDEV_TX_BUSY; |
142 | } | 171 | } |
143 | 172 | ||
144 | slot = &ring->slots[ring->end]; | 173 | slot->dma_addr = dma_map_single(dma_dev, skb->data, skb_headlen(skb), |
145 | slot->skb = skb; | ||
146 | slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len, | ||
147 | DMA_TO_DEVICE); | 174 | DMA_TO_DEVICE); |
148 | if (dma_mapping_error(dma_dev, slot->dma_addr)) { | 175 | if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr))) |
149 | bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n", | 176 | goto err_dma_head; |
150 | ring->mmio_base); | ||
151 | goto err_stop_drop; | ||
152 | } | ||
153 | 177 | ||
154 | ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF; | 178 | flags = BGMAC_DESC_CTL0_SOF; |
155 | if (ring->end == ring->num_slots - 1) | 179 | if (!nr_frags) |
156 | ctl0 |= BGMAC_DESC_CTL0_EOT; | 180 | flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC; |
157 | ctl1 = skb->len & BGMAC_DESC_CTL1_LEN; | ||
158 | 181 | ||
159 | dma_desc = ring->cpu_base; | 182 | bgmac_dma_tx_add_buf(bgmac, ring, index, skb_headlen(skb), flags); |
160 | dma_desc += ring->end; | 183 | flags = 0; |
161 | dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr)); | 184 | |
162 | dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr)); | 185 | for (i = 0; i < nr_frags; i++) { |
163 | dma_desc->ctl0 = cpu_to_le32(ctl0); | 186 | struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; |
164 | dma_desc->ctl1 = cpu_to_le32(ctl1); | 187 | int len = skb_frag_size(frag); |
188 | |||
189 | index = (index + 1) % BGMAC_TX_RING_SLOTS; | ||
190 | slot = &ring->slots[index]; | ||
191 | slot->dma_addr = skb_frag_dma_map(dma_dev, frag, 0, | ||
192 | len, DMA_TO_DEVICE); | ||
193 | if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr))) | ||
194 | goto err_dma; | ||
195 | |||
196 | if (i == nr_frags - 1) | ||
197 | flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC; | ||
198 | |||
199 | bgmac_dma_tx_add_buf(bgmac, ring, index, len, flags); | ||
200 | } | ||
201 | |||
202 | slot->skb = skb; | ||
165 | 203 | ||
166 | netdev_sent_queue(net_dev, skb->len); | 204 | netdev_sent_queue(net_dev, skb->len); |
167 | 205 | ||
@@ -170,20 +208,35 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac, | |||
170 | /* Increase ring->end to point empty slot. We tell hardware the first | 208 | /* Increase ring->end to point empty slot. We tell hardware the first |
171 | * slot it should *not* read. | 209 | * slot it should *not* read. |
172 | */ | 210 | */ |
173 | if (++ring->end >= BGMAC_TX_RING_SLOTS) | 211 | ring->end = (index + 1) % BGMAC_TX_RING_SLOTS; |
174 | ring->end = 0; | ||
175 | bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX, | 212 | bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX, |
176 | ring->index_base + | 213 | ring->index_base + |
177 | ring->end * sizeof(struct bgmac_dma_desc)); | 214 | ring->end * sizeof(struct bgmac_dma_desc)); |
178 | 215 | ||
179 | /* Always keep one slot free to allow detecting bugged calls. */ | 216 | free_slots -= nr_frags + 1; |
180 | if (--free_slots == 1) | 217 | if (free_slots < 8) |
181 | netif_stop_queue(net_dev); | 218 | netif_stop_queue(net_dev); |
182 | 219 | ||
183 | return NETDEV_TX_OK; | 220 | return NETDEV_TX_OK; |
184 | 221 | ||
185 | err_stop_drop: | 222 | err_dma: |
186 | netif_stop_queue(net_dev); | 223 | dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb), |
224 | DMA_TO_DEVICE); | ||
225 | |||
226 | while (i > 0) { | ||
227 | int index = (ring->end + i) % BGMAC_TX_RING_SLOTS; | ||
228 | struct bgmac_slot_info *slot = &ring->slots[index]; | ||
229 | u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1); | ||
230 | int len = ctl1 & BGMAC_DESC_CTL1_LEN; | ||
231 | |||
232 | dma_unmap_page(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE); | ||
233 | } | ||
234 | |||
235 | err_dma_head: | ||
236 | bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n", | ||
237 | ring->mmio_base); | ||
238 | |||
239 | err_drop: | ||
187 | dev_kfree_skb(skb); | 240 | dev_kfree_skb(skb); |
188 | return NETDEV_TX_OK; | 241 | return NETDEV_TX_OK; |
189 | } | 242 | } |
@@ -205,32 +258,45 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring) | |||
205 | 258 | ||
206 | while (ring->start != empty_slot) { | 259 | while (ring->start != empty_slot) { |
207 | struct bgmac_slot_info *slot = &ring->slots[ring->start]; | 260 | struct bgmac_slot_info *slot = &ring->slots[ring->start]; |
261 | u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1); | ||
262 | int len = ctl1 & BGMAC_DESC_CTL1_LEN; | ||
208 | 263 | ||
209 | if (slot->skb) { | 264 | if (!slot->dma_addr) { |
265 | bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n", | ||
266 | ring->start, ring->end); | ||
267 | goto next; | ||
268 | } | ||
269 | |||
270 | if (ctl1 & BGMAC_DESC_CTL0_SOF) | ||
210 | /* Unmap no longer used buffer */ | 271 | /* Unmap no longer used buffer */ |
211 | dma_unmap_single(dma_dev, slot->dma_addr, | 272 | dma_unmap_single(dma_dev, slot->dma_addr, len, |
212 | slot->skb->len, DMA_TO_DEVICE); | 273 | DMA_TO_DEVICE); |
213 | slot->dma_addr = 0; | 274 | else |
275 | dma_unmap_page(dma_dev, slot->dma_addr, len, | ||
276 | DMA_TO_DEVICE); | ||
214 | 277 | ||
278 | if (slot->skb) { | ||
215 | bytes_compl += slot->skb->len; | 279 | bytes_compl += slot->skb->len; |
216 | pkts_compl++; | 280 | pkts_compl++; |
217 | 281 | ||
218 | /* Free memory! :) */ | 282 | /* Free memory! :) */ |
219 | dev_kfree_skb(slot->skb); | 283 | dev_kfree_skb(slot->skb); |
220 | slot->skb = NULL; | 284 | slot->skb = NULL; |
221 | } else { | ||
222 | bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n", | ||
223 | ring->start, ring->end); | ||
224 | } | 285 | } |
225 | 286 | ||
287 | next: | ||
288 | slot->dma_addr = 0; | ||
226 | if (++ring->start >= BGMAC_TX_RING_SLOTS) | 289 | if (++ring->start >= BGMAC_TX_RING_SLOTS) |
227 | ring->start = 0; | 290 | ring->start = 0; |
228 | freed = true; | 291 | freed = true; |
229 | } | 292 | } |
230 | 293 | ||
294 | if (!pkts_compl) | ||
295 | return; | ||
296 | |||
231 | netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl); | 297 | netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl); |
232 | 298 | ||
233 | if (freed && netif_queue_stopped(bgmac->net_dev)) | 299 | if (netif_queue_stopped(bgmac->net_dev)) |
234 | netif_wake_queue(bgmac->net_dev); | 300 | netif_wake_queue(bgmac->net_dev); |
235 | } | 301 | } |
236 | 302 | ||
@@ -439,17 +505,25 @@ static void bgmac_dma_tx_ring_free(struct bgmac *bgmac, | |||
439 | struct bgmac_dma_ring *ring) | 505 | struct bgmac_dma_ring *ring) |
440 | { | 506 | { |
441 | struct device *dma_dev = bgmac->core->dma_dev; | 507 | struct device *dma_dev = bgmac->core->dma_dev; |
508 | struct bgmac_dma_desc *dma_desc = ring->cpu_base; | ||
442 | struct bgmac_slot_info *slot; | 509 | struct bgmac_slot_info *slot; |
443 | int i; | 510 | int i; |
444 | 511 | ||
445 | for (i = 0; i < ring->num_slots; i++) { | 512 | for (i = 0; i < ring->num_slots; i++) { |
513 | int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN; | ||
514 | |||
446 | slot = &ring->slots[i]; | 515 | slot = &ring->slots[i]; |
447 | if (slot->skb) { | 516 | dev_kfree_skb(slot->skb); |
448 | if (slot->dma_addr) | 517 | |
449 | dma_unmap_single(dma_dev, slot->dma_addr, | 518 | if (!slot->dma_addr) |
450 | slot->skb->len, DMA_TO_DEVICE); | 519 | continue; |
451 | dev_kfree_skb(slot->skb); | 520 | |
452 | } | 521 | if (slot->skb) |
522 | dma_unmap_single(dma_dev, slot->dma_addr, | ||
523 | len, DMA_TO_DEVICE); | ||
524 | else | ||
525 | dma_unmap_page(dma_dev, slot->dma_addr, | ||
526 | len, DMA_TO_DEVICE); | ||
453 | } | 527 | } |
454 | } | 528 | } |
455 | 529 | ||
@@ -1583,6 +1657,10 @@ static int bgmac_probe(struct bcma_device *core) | |||
1583 | goto err_dma_free; | 1657 | goto err_dma_free; |
1584 | } | 1658 | } |
1585 | 1659 | ||
1660 | net_dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; | ||
1661 | net_dev->hw_features = net_dev->features; | ||
1662 | net_dev->vlan_features = net_dev->features; | ||
1663 | |||
1586 | err = register_netdev(bgmac->net_dev); | 1664 | err = register_netdev(bgmac->net_dev); |
1587 | if (err) { | 1665 | if (err) { |
1588 | bgmac_err(bgmac, "Cannot register net device\n"); | 1666 | bgmac_err(bgmac, "Cannot register net device\n"); |