aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-06-12 14:02:08 -0400
committerDavid S. Miller <davem@davemloft.net>2014-06-12 14:02:08 -0400
commitfba0e1a3cfcc1d61e593f97650e18931a2aa1fc8 (patch)
tree51de696840abc67a2f5d5803f63212b2fd593481
parent3993c4e159eba0e10c0628737736d6fcf97ab9ef (diff)
parent79f339125ea316e910220e5f5b4ad30370f4de85 (diff)
Merge branch 'fec'
Fugang Duan says: ==================== net: fec: Enable Software TSO to improve the tx performance Add SG and software TSO support for FEC. This feature allows to improve outbound throughput performance. Tested on imx6dl sabresd board, running iperf tcp tests shows: * 82% improvement comparing with NO SG & TSO patch $ ethtool -K eth0 sg on $ ethtool -K eth0 tso on [ 3] local 10.192.242.108 port 35388 connected with 10.192.242.167 port 5001 [ ID] Interval Transfer Bandwidth [ 3] 0.0- 3.0 sec 181 MBytes 506 Mbits/sec * cpu loading is 30% $ ethtool -K eth0 sg off $ ethtool -K eth0 tso off [ 3] local 10.192.242.108 port 52618 connected with 10.192.242.167 port 5001 [ ID] Interval Transfer Bandwidth [ 3] 0.0- 3.0 sec 99.5 MBytes 278 Mbits/sec FEC HW support IP header and TCP/UDP hw checksum, support multi buffer descriptor transfer one frame, but don't support HW TSO. And imx6q/dl SOC FEC Gbps speed has HW bus Bandwidth limitation (400Mbps ~ 700Mbps), imx6sx SOC FEC Gbps speed has no HW bandwidth limitation. The patch set just enable TSO feature, which is done following the mv643xx_eth driver. Test result analyze: imx6dl sabresd board: there have 82% improvement, since imx6dl FEC HW has bandwidth limitation, the performance with SW TSO is a milestone. Addition test: imx6sx sdb board: upstream still don't support imx6sx due to some patches being upstream... they use same FEC IP. Use the SW TSO patches test imx6sx sdb board in internal kernel tree: No SW TSO patch: tx bandwidth 840Mbps, cpu loading is 100%. SW TSO patch: tx bandwidth 942Mbps, cpu loading is 65%. It means the patch set have great improvement for imx6sx FEC performance. V2: * From Frank Li's suggestion: Change the API "fec_enet_txdesc_entry_free" name to "fec_enet_get_free_txdesc_num". * Summary David Laight and Eric Dumazet's thoughts: RX BD entry number change to 256. * From ezequiel's suggestion: Follow the latest TSO fixes from his solution to rework the queue stop/wake-up. Avoid unmapping the TSO header buffers. * From Eric Dumazet's suggestion: Avoid more bytes copy, just copying the unaligned part of the payload into first descriptor. The suggestion will bring more complex for the driver, and imx6dl FEC DMA need 16 bytes alignment, but cpu loading is not problem that cpu loading is 30%, the current performance is so better. Later chip like imx6sx Gigbit FEC DMA support byte alignment, so there don't exist memory copy. So, the V2 version drop the suggestion. Anyway, thanks for Eric's response and suggestion. V3: * From David Laight's feedback: Decide to drop RX BD entry number change for the SW TSO patch set. I will generate one separate patch to increase RX BDs entry for interrupt coalescing feature which will be supported in my later patch set. V4: * From David Laight's feedback: Remove the conditional in .fec_enet_get_bd_index(). V5: * Patch #4 update: From David Laight's feedback: "expect fec_enet_get_free_txdesc_num() to return one less than it does currently." Change the function: Return space available, 0..size-1. it always leave one free entry. Which is same as linux circ_buf. Thanks for Eric and ezequiel's help and idea. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/freescale/fec.h13
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c530
2 files changed, 443 insertions, 100 deletions
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 3b8d6d19ff05..671d080105a7 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -221,7 +221,7 @@ struct bufdesc_ex {
221#define BD_ENET_TX_RCMASK ((ushort)0x003c) 221#define BD_ENET_TX_RCMASK ((ushort)0x003c)
222#define BD_ENET_TX_UN ((ushort)0x0002) 222#define BD_ENET_TX_UN ((ushort)0x0002)
223#define BD_ENET_TX_CSL ((ushort)0x0001) 223#define BD_ENET_TX_CSL ((ushort)0x0001)
224#define BD_ENET_TX_STATS ((ushort)0x03ff) /* All status bits */ 224#define BD_ENET_TX_STATS ((ushort)0x0fff) /* All status bits */
225 225
226/*enhanced buffer descriptor control/status used by Ethernet transmit*/ 226/*enhanced buffer descriptor control/status used by Ethernet transmit*/
227#define BD_ENET_TX_INT 0x40000000 227#define BD_ENET_TX_INT 0x40000000
@@ -246,8 +246,8 @@ struct bufdesc_ex {
246#define RX_RING_SIZE (FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES) 246#define RX_RING_SIZE (FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES)
247#define FEC_ENET_TX_FRSIZE 2048 247#define FEC_ENET_TX_FRSIZE 2048
248#define FEC_ENET_TX_FRPPG (PAGE_SIZE / FEC_ENET_TX_FRSIZE) 248#define FEC_ENET_TX_FRPPG (PAGE_SIZE / FEC_ENET_TX_FRSIZE)
249#define TX_RING_SIZE 16 /* Must be power of two */ 249#define TX_RING_SIZE 512 /* Must be power of two */
250#define TX_RING_MOD_MASK 15 /* for this to work */ 250#define TX_RING_MOD_MASK 511 /* for this to work */
251 251
252#define BD_ENET_RX_INT 0x00800000 252#define BD_ENET_RX_INT 0x00800000
253#define BD_ENET_RX_PTP ((ushort)0x0400) 253#define BD_ENET_RX_PTP ((ushort)0x0400)
@@ -296,8 +296,15 @@ struct fec_enet_private {
296 /* The ring entries to be free()ed */ 296 /* The ring entries to be free()ed */
297 struct bufdesc *dirty_tx; 297 struct bufdesc *dirty_tx;
298 298
299 unsigned short bufdesc_size;
299 unsigned short tx_ring_size; 300 unsigned short tx_ring_size;
300 unsigned short rx_ring_size; 301 unsigned short rx_ring_size;
302 unsigned short tx_stop_threshold;
303 unsigned short tx_wake_threshold;
304
305 /* Software TSO */
306 char *tso_hdrs;
307 dma_addr_t tso_hdrs_dma;
301 308
302 struct platform_device *pdev; 309 struct platform_device *pdev;
303 310
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 802be17285b6..38d9d276ab8b 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -36,6 +36,7 @@
36#include <linux/in.h> 36#include <linux/in.h>
37#include <linux/ip.h> 37#include <linux/ip.h>
38#include <net/ip.h> 38#include <net/ip.h>
39#include <net/tso.h>
39#include <linux/tcp.h> 40#include <linux/tcp.h>
40#include <linux/udp.h> 41#include <linux/udp.h>
41#include <linux/icmp.h> 42#include <linux/icmp.h>
@@ -173,10 +174,6 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
173#endif 174#endif
174#endif /* CONFIG_M5272 */ 175#endif /* CONFIG_M5272 */
175 176
176#if (((RX_RING_SIZE + TX_RING_SIZE) * 32) > PAGE_SIZE)
177#error "FEC: descriptor ring size constants too large"
178#endif
179
180/* Interrupt events/masks. */ 177/* Interrupt events/masks. */
181#define FEC_ENET_HBERR ((uint)0x80000000) /* Heartbeat error */ 178#define FEC_ENET_HBERR ((uint)0x80000000) /* Heartbeat error */
182#define FEC_ENET_BABR ((uint)0x40000000) /* Babbling receiver */ 179#define FEC_ENET_BABR ((uint)0x40000000) /* Babbling receiver */
@@ -232,6 +229,15 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
232#define FEC_PAUSE_FLAG_AUTONEG 0x1 229#define FEC_PAUSE_FLAG_AUTONEG 0x1
233#define FEC_PAUSE_FLAG_ENABLE 0x2 230#define FEC_PAUSE_FLAG_ENABLE 0x2
234 231
232#define TSO_HEADER_SIZE 128
233/* Max number of allowed TCP segments for software TSO */
234#define FEC_MAX_TSO_SEGS 100
235#define FEC_MAX_SKB_DESCS (FEC_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
236
237#define IS_TSO_HEADER(txq, addr) \
238 ((addr >= txq->tso_hdrs_dma) && \
239 (addr < txq->tso_hdrs_dma + txq->tx_ring_size * TSO_HEADER_SIZE))
240
235static int mii_cnt; 241static int mii_cnt;
236 242
237static inline 243static inline
@@ -287,6 +293,22 @@ struct bufdesc *fec_enet_get_prevdesc(struct bufdesc *bdp, struct fec_enet_priva
287 return (new_bd < base) ? (new_bd + ring_size) : new_bd; 293 return (new_bd < base) ? (new_bd + ring_size) : new_bd;
288} 294}
289 295
296static int fec_enet_get_bd_index(struct bufdesc *base, struct bufdesc *bdp,
297 struct fec_enet_private *fep)
298{
299 return ((const char *)bdp - (const char *)base) / fep->bufdesc_size;
300}
301
302static int fec_enet_get_free_txdesc_num(struct fec_enet_private *fep)
303{
304 int entries;
305
306 entries = ((const char *)fep->dirty_tx -
307 (const char *)fep->cur_tx) / fep->bufdesc_size - 1;
308
309 return entries > 0 ? entries : entries + fep->tx_ring_size;
310}
311
290static void *swap_buffer(void *bufaddr, int len) 312static void *swap_buffer(void *bufaddr, int len)
291{ 313{
292 int i; 314 int i;
@@ -308,33 +330,133 @@ fec_enet_clear_csum(struct sk_buff *skb, struct net_device *ndev)
308 if (unlikely(skb_cow_head(skb, 0))) 330 if (unlikely(skb_cow_head(skb, 0)))
309 return -1; 331 return -1;
310 332
333 ip_hdr(skb)->check = 0;
311 *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) = 0; 334 *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) = 0;
312 335
313 return 0; 336 return 0;
314} 337}
315 338
316static netdev_tx_t 339static void
317fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) 340fec_enet_submit_work(struct bufdesc *bdp, struct fec_enet_private *fep)
341{
342 const struct platform_device_id *id_entry =
343 platform_get_device_id(fep->pdev);
344 struct bufdesc *bdp_pre;
345
346 bdp_pre = fec_enet_get_prevdesc(bdp, fep);
347 if ((id_entry->driver_data & FEC_QUIRK_ERR006358) &&
348 !(bdp_pre->cbd_sc & BD_ENET_TX_READY)) {
349 fep->delay_work.trig_tx = true;
350 schedule_delayed_work(&(fep->delay_work.delay_work),
351 msecs_to_jiffies(1));
352 }
353}
354
355static int
356fec_enet_txq_submit_frag_skb(struct sk_buff *skb, struct net_device *ndev)
318{ 357{
319 struct fec_enet_private *fep = netdev_priv(ndev); 358 struct fec_enet_private *fep = netdev_priv(ndev);
320 const struct platform_device_id *id_entry = 359 const struct platform_device_id *id_entry =
321 platform_get_device_id(fep->pdev); 360 platform_get_device_id(fep->pdev);
322 struct bufdesc *bdp, *bdp_pre; 361 struct bufdesc *bdp = fep->cur_tx;
323 void *bufaddr; 362 struct bufdesc_ex *ebdp;
324 unsigned short status; 363 int nr_frags = skb_shinfo(skb)->nr_frags;
364 int frag, frag_len;
365 unsigned short status;
366 unsigned int estatus = 0;
367 skb_frag_t *this_frag;
325 unsigned int index; 368 unsigned int index;
369 void *bufaddr;
370 int i;
326 371
327 /* Fill in a Tx ring entry */ 372 for (frag = 0; frag < nr_frags; frag++) {
373 this_frag = &skb_shinfo(skb)->frags[frag];
374 bdp = fec_enet_get_nextdesc(bdp, fep);
375 ebdp = (struct bufdesc_ex *)bdp;
376
377 status = bdp->cbd_sc;
378 status &= ~BD_ENET_TX_STATS;
379 status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
380 frag_len = skb_shinfo(skb)->frags[frag].size;
381
382 /* Handle the last BD specially */
383 if (frag == nr_frags - 1) {
384 status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST);
385 if (fep->bufdesc_ex) {
386 estatus |= BD_ENET_TX_INT;
387 if (unlikely(skb_shinfo(skb)->tx_flags &
388 SKBTX_HW_TSTAMP && fep->hwts_tx_en))
389 estatus |= BD_ENET_TX_TS;
390 }
391 }
392
393 if (fep->bufdesc_ex) {
394 if (skb->ip_summed == CHECKSUM_PARTIAL)
395 estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
396 ebdp->cbd_bdu = 0;
397 ebdp->cbd_esc = estatus;
398 }
399
400 bufaddr = page_address(this_frag->page.p) + this_frag->page_offset;
401
402 index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
403 if (((unsigned long) bufaddr) & FEC_ALIGNMENT ||
404 id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
405 memcpy(fep->tx_bounce[index], bufaddr, frag_len);
406 bufaddr = fep->tx_bounce[index];
407
408 if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
409 swap_buffer(bufaddr, frag_len);
410 }
411
412 bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr,
413 frag_len, DMA_TO_DEVICE);
414 if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
415 dev_kfree_skb_any(skb);
416 if (net_ratelimit())
417 netdev_err(ndev, "Tx DMA memory map failed\n");
418 goto dma_mapping_error;
419 }
420
421 bdp->cbd_datlen = frag_len;
422 bdp->cbd_sc = status;
423 }
424
425 fep->cur_tx = bdp;
426
427 return 0;
428
429dma_mapping_error:
328 bdp = fep->cur_tx; 430 bdp = fep->cur_tx;
431 for (i = 0; i < frag; i++) {
432 bdp = fec_enet_get_nextdesc(bdp, fep);
433 dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
434 bdp->cbd_datlen, DMA_TO_DEVICE);
435 }
436 return NETDEV_TX_OK;
437}
329 438
330 status = bdp->cbd_sc; 439static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev)
440{
441 struct fec_enet_private *fep = netdev_priv(ndev);
442 const struct platform_device_id *id_entry =
443 platform_get_device_id(fep->pdev);
444 int nr_frags = skb_shinfo(skb)->nr_frags;
445 struct bufdesc *bdp, *last_bdp;
446 void *bufaddr;
447 unsigned short status;
448 unsigned short buflen;
449 unsigned int estatus = 0;
450 unsigned int index;
451 int entries_free;
452 int ret;
331 453
332 if (status & BD_ENET_TX_READY) { 454 entries_free = fec_enet_get_free_txdesc_num(fep);
333 /* Ooops. All transmit buffers are full. Bail out. 455 if (entries_free < MAX_SKB_FRAGS + 1) {
334 * This should not happen, since ndev->tbusy should be set. 456 dev_kfree_skb_any(skb);
335 */ 457 if (net_ratelimit())
336 netdev_err(ndev, "tx queue full!\n"); 458 netdev_err(ndev, "NOT enough BD for SG!\n");
337 return NETDEV_TX_BUSY; 459 return NETDEV_TX_OK;
338 } 460 }
339 461
340 /* Protocol checksum off-load for TCP and UDP. */ 462 /* Protocol checksum off-load for TCP and UDP. */
@@ -343,102 +465,300 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
343 return NETDEV_TX_OK; 465 return NETDEV_TX_OK;
344 } 466 }
345 467
346 /* Clear all of the status flags */ 468 /* Fill in a Tx ring entry */
469 bdp = fep->cur_tx;
470 status = bdp->cbd_sc;
347 status &= ~BD_ENET_TX_STATS; 471 status &= ~BD_ENET_TX_STATS;
348 472
349 /* Set buffer length and buffer pointer */ 473 /* Set buffer length and buffer pointer */
350 bufaddr = skb->data; 474 bufaddr = skb->data;
351 bdp->cbd_datlen = skb->len; 475 buflen = skb_headlen(skb);
352
353 /*
354 * On some FEC implementations data must be aligned on
355 * 4-byte boundaries. Use bounce buffers to copy data
356 * and get it aligned. Ugh.
357 */
358 if (fep->bufdesc_ex)
359 index = (struct bufdesc_ex *)bdp -
360 (struct bufdesc_ex *)fep->tx_bd_base;
361 else
362 index = bdp - fep->tx_bd_base;
363 476
364 if (((unsigned long) bufaddr) & FEC_ALIGNMENT) { 477 index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
365 memcpy(fep->tx_bounce[index], skb->data, skb->len); 478 if (((unsigned long) bufaddr) & FEC_ALIGNMENT ||
479 id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
480 memcpy(fep->tx_bounce[index], skb->data, buflen);
366 bufaddr = fep->tx_bounce[index]; 481 bufaddr = fep->tx_bounce[index];
367 }
368 482
369 /* 483 if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
370 * Some design made an incorrect assumption on endian mode of 484 swap_buffer(bufaddr, buflen);
371 * the system that it's running on. As the result, driver has to 485 }
372 * swap every frame going to and coming from the controller.
373 */
374 if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
375 swap_buffer(bufaddr, skb->len);
376
377 /* Save skb pointer */
378 fep->tx_skbuff[index] = skb;
379 486
380 /* Push the data cache so the CPM does not get stale memory 487 /* Push the data cache so the CPM does not get stale memory
381 * data. 488 * data.
382 */ 489 */
383 bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr, 490 bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr,
384 skb->len, DMA_TO_DEVICE); 491 buflen, DMA_TO_DEVICE);
385 if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) { 492 if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
386 bdp->cbd_bufaddr = 0;
387 fep->tx_skbuff[index] = NULL;
388 dev_kfree_skb_any(skb); 493 dev_kfree_skb_any(skb);
389 if (net_ratelimit()) 494 if (net_ratelimit())
390 netdev_err(ndev, "Tx DMA memory map failed\n"); 495 netdev_err(ndev, "Tx DMA memory map failed\n");
391 return NETDEV_TX_OK; 496 return NETDEV_TX_OK;
392 } 497 }
393 498
499 if (nr_frags) {
500 ret = fec_enet_txq_submit_frag_skb(skb, ndev);
501 if (ret)
502 return ret;
503 } else {
504 status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST);
505 if (fep->bufdesc_ex) {
506 estatus = BD_ENET_TX_INT;
507 if (unlikely(skb_shinfo(skb)->tx_flags &
508 SKBTX_HW_TSTAMP && fep->hwts_tx_en))
509 estatus |= BD_ENET_TX_TS;
510 }
511 }
512
394 if (fep->bufdesc_ex) { 513 if (fep->bufdesc_ex) {
395 514
396 struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; 515 struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
397 ebdp->cbd_bdu = 0; 516
398 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && 517 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
399 fep->hwts_tx_en)) { 518 fep->hwts_tx_en))
400 ebdp->cbd_esc = (BD_ENET_TX_TS | BD_ENET_TX_INT);
401 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 519 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
402 } else {
403 ebdp->cbd_esc = BD_ENET_TX_INT;
404 520
405 /* Enable protocol checksum flags 521 if (skb->ip_summed == CHECKSUM_PARTIAL)
406 * We do not bother with the IP Checksum bits as they 522 estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
407 * are done by the kernel 523
408 */ 524 ebdp->cbd_bdu = 0;
409 if (skb->ip_summed == CHECKSUM_PARTIAL) 525 ebdp->cbd_esc = estatus;
410 ebdp->cbd_esc |= BD_ENET_TX_PINS;
411 }
412 } 526 }
413 527
528 last_bdp = fep->cur_tx;
529 index = fec_enet_get_bd_index(fep->tx_bd_base, last_bdp, fep);
530 /* Save skb pointer */
531 fep->tx_skbuff[index] = skb;
532
533 bdp->cbd_datlen = buflen;
534
414 /* Send it on its way. Tell FEC it's ready, interrupt when done, 535 /* Send it on its way. Tell FEC it's ready, interrupt when done,
415 * it's the last BD of the frame, and to put the CRC on the end. 536 * it's the last BD of the frame, and to put the CRC on the end.
416 */ 537 */
417 status |= (BD_ENET_TX_READY | BD_ENET_TX_INTR 538 status |= (BD_ENET_TX_READY | BD_ENET_TX_TC);
418 | BD_ENET_TX_LAST | BD_ENET_TX_TC);
419 bdp->cbd_sc = status; 539 bdp->cbd_sc = status;
420 540
421 bdp_pre = fec_enet_get_prevdesc(bdp, fep); 541 fec_enet_submit_work(bdp, fep);
422 if ((id_entry->driver_data & FEC_QUIRK_ERR006358) &&
423 !(bdp_pre->cbd_sc & BD_ENET_TX_READY)) {
424 fep->delay_work.trig_tx = true;
425 schedule_delayed_work(&(fep->delay_work.delay_work),
426 msecs_to_jiffies(1));
427 }
428 542
429 /* If this was the last BD in the ring, start at the beginning again. */ 543 /* If this was the last BD in the ring, start at the beginning again. */
430 bdp = fec_enet_get_nextdesc(bdp, fep); 544 bdp = fec_enet_get_nextdesc(last_bdp, fep);
431 545
432 skb_tx_timestamp(skb); 546 skb_tx_timestamp(skb);
433 547
434 fep->cur_tx = bdp; 548 fep->cur_tx = bdp;
435 549
436 if (fep->cur_tx == fep->dirty_tx) 550 /* Trigger transmission start */
437 netif_stop_queue(ndev); 551 writel(0, fep->hwp + FEC_X_DES_ACTIVE);
552
553 return 0;
554}
555
556static int
557fec_enet_txq_put_data_tso(struct sk_buff *skb, struct net_device *ndev,
558 struct bufdesc *bdp, int index, char *data,
559 int size, bool last_tcp, bool is_last)
560{
561 struct fec_enet_private *fep = netdev_priv(ndev);
562 const struct platform_device_id *id_entry =
563 platform_get_device_id(fep->pdev);
564 struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
565 unsigned short status;
566 unsigned int estatus = 0;
567
568 status = bdp->cbd_sc;
569 status &= ~BD_ENET_TX_STATS;
570
571 status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
572 bdp->cbd_datlen = size;
573
574 if (((unsigned long) data) & FEC_ALIGNMENT ||
575 id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
576 memcpy(fep->tx_bounce[index], data, size);
577 data = fep->tx_bounce[index];
578
579 if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
580 swap_buffer(data, size);
581 }
582
583 bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, data,
584 size, DMA_TO_DEVICE);
585 if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
586 dev_kfree_skb_any(skb);
587 if (net_ratelimit())
588 netdev_err(ndev, "Tx DMA memory map failed\n");
589 return NETDEV_TX_BUSY;
590 }
591
592 if (fep->bufdesc_ex) {
593 if (skb->ip_summed == CHECKSUM_PARTIAL)
594 estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
595 ebdp->cbd_bdu = 0;
596 ebdp->cbd_esc = estatus;
597 }
598
599 /* Handle the last BD specially */
600 if (last_tcp)
601 status |= (BD_ENET_TX_LAST | BD_ENET_TX_TC);
602 if (is_last) {
603 status |= BD_ENET_TX_INTR;
604 if (fep->bufdesc_ex)
605 ebdp->cbd_esc |= BD_ENET_TX_INT;
606 }
607
608 bdp->cbd_sc = status;
609
610 return 0;
611}
612
613static int
614fec_enet_txq_put_hdr_tso(struct sk_buff *skb, struct net_device *ndev,
615 struct bufdesc *bdp, int index)
616{
617 struct fec_enet_private *fep = netdev_priv(ndev);
618 const struct platform_device_id *id_entry =
619 platform_get_device_id(fep->pdev);
620 int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
621 struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
622 void *bufaddr;
623 unsigned long dmabuf;
624 unsigned short status;
625 unsigned int estatus = 0;
626
627 status = bdp->cbd_sc;
628 status &= ~BD_ENET_TX_STATS;
629 status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
630
631 bufaddr = fep->tso_hdrs + index * TSO_HEADER_SIZE;
632 dmabuf = fep->tso_hdrs_dma + index * TSO_HEADER_SIZE;
633 if (((unsigned long) bufaddr) & FEC_ALIGNMENT ||
634 id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
635 memcpy(fep->tx_bounce[index], skb->data, hdr_len);
636 bufaddr = fep->tx_bounce[index];
637
638 if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
639 swap_buffer(bufaddr, hdr_len);
640
641 dmabuf = dma_map_single(&fep->pdev->dev, bufaddr,
642 hdr_len, DMA_TO_DEVICE);
643 if (dma_mapping_error(&fep->pdev->dev, dmabuf)) {
644 dev_kfree_skb_any(skb);
645 if (net_ratelimit())
646 netdev_err(ndev, "Tx DMA memory map failed\n");
647 return NETDEV_TX_BUSY;
648 }
649 }
650
651 bdp->cbd_bufaddr = dmabuf;
652 bdp->cbd_datlen = hdr_len;
653
654 if (fep->bufdesc_ex) {
655 if (skb->ip_summed == CHECKSUM_PARTIAL)
656 estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
657 ebdp->cbd_bdu = 0;
658 ebdp->cbd_esc = estatus;
659 }
660
661 bdp->cbd_sc = status;
662
663 return 0;
664}
665
666static int fec_enet_txq_submit_tso(struct sk_buff *skb, struct net_device *ndev)
667{
668 struct fec_enet_private *fep = netdev_priv(ndev);
669 int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
670 int total_len, data_left;
671 struct bufdesc *bdp = fep->cur_tx;
672 struct tso_t tso;
673 unsigned int index = 0;
674 int ret;
675
676 if (tso_count_descs(skb) >= fec_enet_get_free_txdesc_num(fep)) {
677 dev_kfree_skb_any(skb);
678 if (net_ratelimit())
679 netdev_err(ndev, "NOT enough BD for TSO!\n");
680 return NETDEV_TX_OK;
681 }
682
683 /* Protocol checksum off-load for TCP and UDP. */
684 if (fec_enet_clear_csum(skb, ndev)) {
685 dev_kfree_skb_any(skb);
686 return NETDEV_TX_OK;
687 }
688
689 /* Initialize the TSO handler, and prepare the first payload */
690 tso_start(skb, &tso);
691
692 total_len = skb->len - hdr_len;
693 while (total_len > 0) {
694 char *hdr;
695
696 index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
697 data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
698 total_len -= data_left;
699
700 /* prepare packet headers: MAC + IP + TCP */
701 hdr = fep->tso_hdrs + index * TSO_HEADER_SIZE;
702 tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
703 ret = fec_enet_txq_put_hdr_tso(skb, ndev, bdp, index);
704 if (ret)
705 goto err_release;
706
707 while (data_left > 0) {
708 int size;
709
710 size = min_t(int, tso.size, data_left);
711 bdp = fec_enet_get_nextdesc(bdp, fep);
712 index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
713 ret = fec_enet_txq_put_data_tso(skb, ndev, bdp, index, tso.data,
714 size, size == data_left,
715 total_len == 0);
716 if (ret)
717 goto err_release;
718
719 data_left -= size;
720 tso_build_data(skb, &tso, size);
721 }
722
723 bdp = fec_enet_get_nextdesc(bdp, fep);
724 }
725
726 /* Save skb pointer */
727 fep->tx_skbuff[index] = skb;
728
729 fec_enet_submit_work(bdp, fep);
730
731 skb_tx_timestamp(skb);
732 fep->cur_tx = bdp;
438 733
439 /* Trigger transmission start */ 734 /* Trigger transmission start */
440 writel(0, fep->hwp + FEC_X_DES_ACTIVE); 735 writel(0, fep->hwp + FEC_X_DES_ACTIVE);
441 736
737 return 0;
738
739err_release:
740 /* TODO: Release all used data descriptors for TSO */
741 return ret;
742}
743
744static netdev_tx_t
745fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
746{
747 struct fec_enet_private *fep = netdev_priv(ndev);
748 int entries_free;
749 int ret;
750
751 if (skb_is_gso(skb))
752 ret = fec_enet_txq_submit_tso(skb, ndev);
753 else
754 ret = fec_enet_txq_submit_skb(skb, ndev);
755 if (ret)
756 return ret;
757
758 entries_free = fec_enet_get_free_txdesc_num(fep);
759 if (entries_free <= fep->tx_stop_threshold)
760 netif_stop_queue(ndev);
761
442 return NETDEV_TX_OK; 762 return NETDEV_TX_OK;
443} 763}
444 764
@@ -757,6 +1077,7 @@ fec_enet_tx(struct net_device *ndev)
757 unsigned short status; 1077 unsigned short status;
758 struct sk_buff *skb; 1078 struct sk_buff *skb;
759 int index = 0; 1079 int index = 0;
1080 int entries_free;
760 1081
761 fep = netdev_priv(ndev); 1082 fep = netdev_priv(ndev);
762 bdp = fep->dirty_tx; 1083 bdp = fep->dirty_tx;
@@ -770,16 +1091,17 @@ fec_enet_tx(struct net_device *ndev)
770 if (bdp == fep->cur_tx) 1091 if (bdp == fep->cur_tx)
771 break; 1092 break;
772 1093
773 if (fep->bufdesc_ex) 1094 index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
774 index = (struct bufdesc_ex *)bdp -
775 (struct bufdesc_ex *)fep->tx_bd_base;
776 else
777 index = bdp - fep->tx_bd_base;
778 1095
779 skb = fep->tx_skbuff[index]; 1096 skb = fep->tx_skbuff[index];
780 dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, skb->len, 1097 if (!IS_TSO_HEADER(fep, bdp->cbd_bufaddr))
781 DMA_TO_DEVICE); 1098 dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
1099 bdp->cbd_datlen, DMA_TO_DEVICE);
782 bdp->cbd_bufaddr = 0; 1100 bdp->cbd_bufaddr = 0;
1101 if (!skb) {
1102 bdp = fec_enet_get_nextdesc(bdp, fep);
1103 continue;
1104 }
783 1105
784 /* Check for errors. */ 1106 /* Check for errors. */
785 if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC | 1107 if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC |
@@ -798,7 +1120,7 @@ fec_enet_tx(struct net_device *ndev)
798 ndev->stats.tx_carrier_errors++; 1120 ndev->stats.tx_carrier_errors++;
799 } else { 1121 } else {
800 ndev->stats.tx_packets++; 1122 ndev->stats.tx_packets++;
801 ndev->stats.tx_bytes += bdp->cbd_datlen; 1123 ndev->stats.tx_bytes += skb->len;
802 } 1124 }
803 1125
804 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) && 1126 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) &&
@@ -835,15 +1157,15 @@ fec_enet_tx(struct net_device *ndev)
835 1157
836 /* Since we have freed up a buffer, the ring is no longer full 1158 /* Since we have freed up a buffer, the ring is no longer full
837 */ 1159 */
838 if (fep->dirty_tx != fep->cur_tx) { 1160 if (netif_queue_stopped(ndev)) {
839 if (netif_queue_stopped(ndev)) 1161 entries_free = fec_enet_get_free_txdesc_num(fep);
1162 if (entries_free >= fep->tx_wake_threshold)
840 netif_wake_queue(ndev); 1163 netif_wake_queue(ndev);
841 } 1164 }
842 } 1165 }
843 return; 1166 return;
844} 1167}
845 1168
846
847/* During a receive, the cur_rx points to the current incoming buffer. 1169/* During a receive, the cur_rx points to the current incoming buffer.
848 * When we update through the ring, if the next incoming buffer has 1170 * When we update through the ring, if the next incoming buffer has
849 * not been given to the system, we just set the empty indicator, 1171 * not been given to the system, we just set the empty indicator,
@@ -921,11 +1243,7 @@ fec_enet_rx(struct net_device *ndev, int budget)
921 pkt_len = bdp->cbd_datlen; 1243 pkt_len = bdp->cbd_datlen;
922 ndev->stats.rx_bytes += pkt_len; 1244 ndev->stats.rx_bytes += pkt_len;
923 1245
924 if (fep->bufdesc_ex) 1246 index = fec_enet_get_bd_index(fep->rx_bd_base, bdp, fep);
925 index = (struct bufdesc_ex *)bdp -
926 (struct bufdesc_ex *)fep->rx_bd_base;
927 else
928 index = bdp - fep->rx_bd_base;
929 data = fep->rx_skbuff[index]->data; 1247 data = fep->rx_skbuff[index]->data;
930 dma_sync_single_for_cpu(&fep->pdev->dev, bdp->cbd_bufaddr, 1248 dma_sync_single_for_cpu(&fep->pdev->dev, bdp->cbd_bufaddr,
931 FEC_ENET_RX_FRSIZE, DMA_FROM_DEVICE); 1249 FEC_ENET_RX_FRSIZE, DMA_FROM_DEVICE);
@@ -2039,13 +2357,35 @@ static int fec_enet_init(struct net_device *ndev)
2039 const struct platform_device_id *id_entry = 2357 const struct platform_device_id *id_entry =
2040 platform_get_device_id(fep->pdev); 2358 platform_get_device_id(fep->pdev);
2041 struct bufdesc *cbd_base; 2359 struct bufdesc *cbd_base;
2360 int bd_size;
2361
2362 /* init the tx & rx ring size */
2363 fep->tx_ring_size = TX_RING_SIZE;
2364 fep->rx_ring_size = RX_RING_SIZE;
2365
2366 fep->tx_stop_threshold = FEC_MAX_SKB_DESCS;
2367 fep->tx_wake_threshold = (fep->tx_ring_size - fep->tx_stop_threshold) / 2;
2368
2369 if (fep->bufdesc_ex)
2370 fep->bufdesc_size = sizeof(struct bufdesc_ex);
2371 else
2372 fep->bufdesc_size = sizeof(struct bufdesc);
2373 bd_size = (fep->tx_ring_size + fep->rx_ring_size) *
2374 fep->bufdesc_size;
2042 2375
2043 /* Allocate memory for buffer descriptors. */ 2376 /* Allocate memory for buffer descriptors. */
2044 cbd_base = dma_alloc_coherent(NULL, PAGE_SIZE, &fep->bd_dma, 2377 cbd_base = dma_alloc_coherent(NULL, bd_size, &fep->bd_dma,
2045 GFP_KERNEL); 2378 GFP_KERNEL);
2046 if (!cbd_base) 2379 if (!cbd_base)
2047 return -ENOMEM; 2380 return -ENOMEM;
2048 2381
2382 fep->tso_hdrs = dma_alloc_coherent(NULL, fep->tx_ring_size * TSO_HEADER_SIZE,
2383 &fep->tso_hdrs_dma, GFP_KERNEL);
2384 if (!fep->tso_hdrs) {
2385 dma_free_coherent(NULL, bd_size, cbd_base, fep->bd_dma);
2386 return -ENOMEM;
2387 }
2388
2049 memset(cbd_base, 0, PAGE_SIZE); 2389 memset(cbd_base, 0, PAGE_SIZE);
2050 2390
2051 fep->netdev = ndev; 2391 fep->netdev = ndev;
@@ -2055,10 +2395,6 @@ static int fec_enet_init(struct net_device *ndev)
2055 /* make sure MAC we just acquired is programmed into the hw */ 2395 /* make sure MAC we just acquired is programmed into the hw */
2056 fec_set_mac_address(ndev, NULL); 2396 fec_set_mac_address(ndev, NULL);
2057 2397
2058 /* init the tx & rx ring size */
2059 fep->tx_ring_size = TX_RING_SIZE;
2060 fep->rx_ring_size = RX_RING_SIZE;
2061
2062 /* Set receive and transmit descriptor base. */ 2398 /* Set receive and transmit descriptor base. */
2063 fep->rx_bd_base = cbd_base; 2399 fep->rx_bd_base = cbd_base;
2064 if (fep->bufdesc_ex) 2400 if (fep->bufdesc_ex)
@@ -2075,21 +2411,21 @@ static int fec_enet_init(struct net_device *ndev)
2075 writel(FEC_RX_DISABLED_IMASK, fep->hwp + FEC_IMASK); 2411 writel(FEC_RX_DISABLED_IMASK, fep->hwp + FEC_IMASK);
2076 netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, NAPI_POLL_WEIGHT); 2412 netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, NAPI_POLL_WEIGHT);
2077 2413
2078 if (id_entry->driver_data & FEC_QUIRK_HAS_VLAN) { 2414 if (id_entry->driver_data & FEC_QUIRK_HAS_VLAN)
2079 /* enable hw VLAN support */ 2415 /* enable hw VLAN support */
2080 ndev->features |= NETIF_F_HW_VLAN_CTAG_RX; 2416 ndev->features |= NETIF_F_HW_VLAN_CTAG_RX;
2081 ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
2082 }
2083 2417
2084 if (id_entry->driver_data & FEC_QUIRK_HAS_CSUM) { 2418 if (id_entry->driver_data & FEC_QUIRK_HAS_CSUM) {
2419 ndev->gso_max_segs = FEC_MAX_TSO_SEGS;
2420
2085 /* enable hw accelerator */ 2421 /* enable hw accelerator */
2086 ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM 2422 ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
2087 | NETIF_F_RXCSUM); 2423 | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO);
2088 ndev->hw_features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
2089 | NETIF_F_RXCSUM);
2090 fep->csum_flags |= FLAG_RX_CSUM_ENABLED; 2424 fep->csum_flags |= FLAG_RX_CSUM_ENABLED;
2091 } 2425 }
2092 2426
2427 ndev->hw_features = ndev->features;
2428
2093 fec_restart(ndev, 0); 2429 fec_restart(ndev, 0);
2094 2430
2095 return 0; 2431 return 0;