aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOlof Johansson <olof@lixom.net>2007-10-02 17:27:15 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2007-10-10 19:54:25 -0400
commitad5da10a64bdca1ed39b25946727a1ce2659f3d4 (patch)
tree471d87010666ee8af865c6e573515322c798e3ec
parent8dc121a4b620090e594945fd36f878836fc5a14a (diff)
pasemi_mac: further performance tweaks
pasemi_mac: further performance tweaks Misc driver tweaks for pasemi_mac: * Increase ring size (really needed mostly on 10G) * Take out an unneeded barrier * Move around a few prefetches and reorder a few calls * Don't try to clean on full tx buffer, just let things take their course and stop the queue directly * Avoid filling on the same line as the interface is working on to reduce cache line bouncing * Avoid unneeded clearing of software state (and make the interface shutdown code handle it) * Fix up some of the tx ring wrap logic. Signed-off-by: Olof Johansson <olof@lixom.net> Signed-off-by: Jeff Garzik <jeff@garzik.org>
-rw-r--r--drivers/net/pasemi_mac.c92
1 files changed, 47 insertions, 45 deletions
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index b3994f5d2d2b..4a451f8c6f4d 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -56,8 +56,8 @@
56 56
57 57
58/* Must be a power of two */ 58/* Must be a power of two */
59#define RX_RING_SIZE 512 59#define RX_RING_SIZE 4096
60#define TX_RING_SIZE 512 60#define TX_RING_SIZE 4096
61 61
62#define DEFAULT_MSG_ENABLE \ 62#define DEFAULT_MSG_ENABLE \
63 (NETIF_MSG_DRV | \ 63 (NETIF_MSG_DRV | \
@@ -336,8 +336,16 @@ static void pasemi_mac_free_tx_resources(struct net_device *dev)
336 struct pasemi_mac_buffer *info; 336 struct pasemi_mac_buffer *info;
337 dma_addr_t dmas[MAX_SKB_FRAGS+1]; 337 dma_addr_t dmas[MAX_SKB_FRAGS+1];
338 int freed; 338 int freed;
339 int start, limit;
339 340
340 for (i = 0; i < TX_RING_SIZE; i += freed) { 341 start = mac->tx->next_to_clean;
342 limit = mac->tx->next_to_fill;
343
344 /* Compensate for when fill has wrapped and clean has not */
345 if (start > limit)
346 limit += TX_RING_SIZE;
347
348 for (i = start; i < limit; i += freed) {
341 info = &TX_RING_INFO(mac, i+1); 349 info = &TX_RING_INFO(mac, i+1);
342 if (info->dma && info->skb) { 350 if (info->dma && info->skb) {
343 for (j = 0; j <= skb_shinfo(info->skb)->nr_frags; j++) 351 for (j = 0; j <= skb_shinfo(info->skb)->nr_frags; j++)
@@ -520,9 +528,6 @@ static int pasemi_mac_clean_rx(struct pasemi_mac *mac, int limit)
520 n = mac->rx->next_to_clean; 528 n = mac->rx->next_to_clean;
521 529
522 for (count = limit; count; count--) { 530 for (count = limit; count; count--) {
523
524 rmb();
525
526 macrx = RX_RING(mac, n); 531 macrx = RX_RING(mac, n);
527 532
528 if ((macrx & XCT_MACRX_E) || 533 if ((macrx & XCT_MACRX_E) ||
@@ -550,14 +555,10 @@ static int pasemi_mac_clean_rx(struct pasemi_mac *mac, int limit)
550 break; 555 break;
551 } 556 }
552 557
553 prefetchw(info);
554
555 skb = info->skb; 558 skb = info->skb;
556 prefetchw(skb);
557 info->dma = 0;
558 559
559 pci_unmap_single(mac->dma_pdev, dma, skb->len, 560 prefetch(skb);
560 PCI_DMA_FROMDEVICE); 561 prefetch(&skb->data_len);
561 562
562 len = (macrx & XCT_MACRX_LLEN_M) >> XCT_MACRX_LLEN_S; 563 len = (macrx & XCT_MACRX_LLEN_M) >> XCT_MACRX_LLEN_S;
563 564
@@ -576,10 +577,9 @@ static int pasemi_mac_clean_rx(struct pasemi_mac *mac, int limit)
576 } else 577 } else
577 info->skb = NULL; 578 info->skb = NULL;
578 579
579 /* Need to zero it out since hardware doesn't, since the 580 pci_unmap_single(mac->dma_pdev, dma, len, PCI_DMA_FROMDEVICE);
580 * replenish loop uses it to tell when it's done. 581
581 */ 582 info->dma = 0;
582 RX_BUFF(mac, i) = 0;
583 583
584 skb_put(skb, len); 584 skb_put(skb, len);
585 585
@@ -599,6 +599,11 @@ static int pasemi_mac_clean_rx(struct pasemi_mac *mac, int limit)
599 RX_RING(mac, n) = 0; 599 RX_RING(mac, n) = 0;
600 RX_RING(mac, n+1) = 0; 600 RX_RING(mac, n+1) = 0;
601 601
602 /* Need to zero it out since hardware doesn't, since the
603 * replenish loop uses it to tell when it's done.
604 */
605 RX_BUFF(mac, i) = 0;
606
602 n += 2; 607 n += 2;
603 } 608 }
604 609
@@ -621,27 +626,33 @@ static int pasemi_mac_clean_rx(struct pasemi_mac *mac, int limit)
621static int pasemi_mac_clean_tx(struct pasemi_mac *mac) 626static int pasemi_mac_clean_tx(struct pasemi_mac *mac)
622{ 627{
623 int i, j; 628 int i, j;
624 struct pasemi_mac_buffer *info; 629 unsigned int start, descr_count, buf_count, batch_limit;
625 unsigned int start, descr_count, buf_count, limit; 630 unsigned int ring_limit;
626 unsigned int total_count; 631 unsigned int total_count;
627 unsigned long flags; 632 unsigned long flags;
628 struct sk_buff *skbs[TX_CLEAN_BATCHSIZE]; 633 struct sk_buff *skbs[TX_CLEAN_BATCHSIZE];
629 dma_addr_t dmas[TX_CLEAN_BATCHSIZE][MAX_SKB_FRAGS+1]; 634 dma_addr_t dmas[TX_CLEAN_BATCHSIZE][MAX_SKB_FRAGS+1];
630 635
631 total_count = 0; 636 total_count = 0;
632 limit = TX_CLEAN_BATCHSIZE; 637 batch_limit = TX_CLEAN_BATCHSIZE;
633restart: 638restart:
634 spin_lock_irqsave(&mac->tx->lock, flags); 639 spin_lock_irqsave(&mac->tx->lock, flags);
635 640
636 start = mac->tx->next_to_clean; 641 start = mac->tx->next_to_clean;
642 ring_limit = mac->tx->next_to_fill;
643
644 /* Compensate for when fill has wrapped but clean has not */
645 if (start > ring_limit)
646 ring_limit += TX_RING_SIZE;
637 647
638 buf_count = 0; 648 buf_count = 0;
639 descr_count = 0; 649 descr_count = 0;
640 650
641 for (i = start; 651 for (i = start;
642 descr_count < limit && i < mac->tx->next_to_fill; 652 descr_count < batch_limit && i < ring_limit;
643 i += buf_count) { 653 i += buf_count) {
644 u64 mactx = TX_RING(mac, i); 654 u64 mactx = TX_RING(mac, i);
655 struct sk_buff *skb;
645 656
646 if ((mactx & XCT_MACTX_E) || 657 if ((mactx & XCT_MACTX_E) ||
647 (*mac->tx_status & PAS_STATUS_ERROR)) 658 (*mac->tx_status & PAS_STATUS_ERROR))
@@ -651,19 +662,15 @@ restart:
651 /* Not yet transmitted */ 662 /* Not yet transmitted */
652 break; 663 break;
653 664
654 info = &TX_RING_INFO(mac, i+1); 665 skb = TX_RING_INFO(mac, i+1).skb;
655 skbs[descr_count] = info->skb; 666 skbs[descr_count] = skb;
656 667
657 buf_count = 2 + skb_shinfo(info->skb)->nr_frags; 668 buf_count = 2 + skb_shinfo(skb)->nr_frags;
658 for (j = 0; j <= skb_shinfo(info->skb)->nr_frags; j++) 669 for (j = 0; j <= skb_shinfo(skb)->nr_frags; j++)
659 dmas[descr_count][j] = TX_RING_INFO(mac, i+1+j).dma; 670 dmas[descr_count][j] = TX_RING_INFO(mac, i+1+j).dma;
660 671
661
662 info->dma = 0;
663 TX_RING(mac, i) = 0; 672 TX_RING(mac, i) = 0;
664 TX_RING(mac, i+1) = 0; 673 TX_RING(mac, i+1) = 0;
665 TX_RING_INFO(mac, i+1).skb = 0;
666 TX_RING_INFO(mac, i+1).dma = 0;
667 674
668 /* Since we always fill with an even number of entries, make 675 /* Since we always fill with an even number of entries, make
669 * sure we skip any unused one at the end as well. 676 * sure we skip any unused one at the end as well.
@@ -672,7 +679,7 @@ restart:
672 buf_count++; 679 buf_count++;
673 descr_count++; 680 descr_count++;
674 } 681 }
675 mac->tx->next_to_clean = i; 682 mac->tx->next_to_clean = i & (TX_RING_SIZE-1);
676 683
677 spin_unlock_irqrestore(&mac->tx->lock, flags); 684 spin_unlock_irqrestore(&mac->tx->lock, flags);
678 netif_wake_queue(mac->netdev); 685 netif_wake_queue(mac->netdev);
@@ -683,7 +690,7 @@ restart:
683 total_count += descr_count; 690 total_count += descr_count;
684 691
685 /* If the batch was full, try to clean more */ 692 /* If the batch was full, try to clean more */
686 if (descr_count == limit) 693 if (descr_count == batch_limit)
687 goto restart; 694 goto restart;
688 695
689 return total_count; 696 return total_count;
@@ -1106,19 +1113,14 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
1106 1113
1107 spin_lock_irqsave(&txring->lock, flags); 1114 spin_lock_irqsave(&txring->lock, flags);
1108 1115
1109 if (RING_AVAIL(txring) <= nfrags+3) { 1116 /* Avoid stepping on the same cache line that the DMA controller
1110 spin_unlock_irqrestore(&txring->lock, flags); 1117 * is currently about to send, so leave at least 8 words available.
1111 pasemi_mac_clean_tx(mac); 1118 * Total free space needed is mactx + fragments + 8
1112 pasemi_mac_restart_tx_intr(mac); 1119 */
1113 spin_lock_irqsave(&txring->lock, flags); 1120 if (RING_AVAIL(txring) < nfrags + 10) {
1114 1121 /* no room -- stop the queue and wait for tx intr */
1115 if (RING_AVAIL(txring) <= nfrags+3) { 1122 netif_stop_queue(dev);
1116 /* Still no room -- stop the queue and wait for tx 1123 goto out_err;
1117 * intr when there's room.
1118 */
1119 netif_stop_queue(dev);
1120 goto out_err;
1121 }
1122 } 1124 }
1123 1125
1124 TX_RING(mac, txring->next_to_fill) = mactx; 1126 TX_RING(mac, txring->next_to_fill) = mactx;
@@ -1137,8 +1139,8 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
1137 if (nfrags & 1) 1139 if (nfrags & 1)
1138 nfrags++; 1140 nfrags++;
1139 1141
1140 txring->next_to_fill += nfrags + 1; 1142 txring->next_to_fill = (txring->next_to_fill + nfrags + 1) &
1141 1143 (TX_RING_SIZE-1);
1142 1144
1143 dev->stats.tx_packets++; 1145 dev->stats.tx_packets++;
1144 dev->stats.tx_bytes += skb->len; 1146 dev->stats.tx_bytes += skb->len;