aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorOlof Johansson <olof@lixom.net>2007-11-28 21:56:41 -0500
committerDavid S. Miller <davem@davemloft.net>2008-01-28 18:04:22 -0500
commit5c15332bed4c59fff6423f08ef6bd6894af38a99 (patch)
treec8969c43b540b4315e234fe351c325adcd5a7673 /drivers
parent34c20624ce541f8a7ff937f474af51f9044cedd7 (diff)
pasemi_mac: performance tweaks
pasemi_mac: performance tweaks * Seems like we do better with a smaller RX ring, probably because chances of still having the SKB cached are better * Const-ify variables to get better code generation and fewer reloads * Move prefetching around a little, and try to prefetch the whole SKB * Set NETIF_F_HIGHDMA * Misc other minor tweaks Signed-off-by: Olof Johansson <olof@lixom.net> Signed-off-by: Jeff Garzik <jeff@garzik.org>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/pasemi_mac.c114
1 files changed, 68 insertions, 46 deletions
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index a50eb34ece5d..c6e24a8dcf72 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -56,7 +56,7 @@
56 56
57 57
58/* Must be a power of two */ 58/* Must be a power of two */
59#define RX_RING_SIZE 4096 59#define RX_RING_SIZE 1024
60#define TX_RING_SIZE 4096 60#define TX_RING_SIZE 4096
61 61
62#define DEFAULT_MSG_ENABLE \ 62#define DEFAULT_MSG_ENABLE \
@@ -103,12 +103,12 @@ static void write_iob_reg(unsigned int reg, unsigned int val)
103 pasemi_write_iob_reg(reg, val); 103 pasemi_write_iob_reg(reg, val);
104} 104}
105 105
106static unsigned int read_mac_reg(struct pasemi_mac *mac, unsigned int reg) 106static unsigned int read_mac_reg(const struct pasemi_mac *mac, unsigned int reg)
107{ 107{
108 return pasemi_read_mac_reg(mac->dma_if, reg); 108 return pasemi_read_mac_reg(mac->dma_if, reg);
109} 109}
110 110
111static void write_mac_reg(struct pasemi_mac *mac, unsigned int reg, 111static void write_mac_reg(const struct pasemi_mac *mac, unsigned int reg,
112 unsigned int val) 112 unsigned int val)
113{ 113{
114 pasemi_write_mac_reg(mac->dma_if, reg, val); 114 pasemi_write_mac_reg(mac->dma_if, reg, val);
@@ -124,16 +124,26 @@ static void write_dma_reg(unsigned int reg, unsigned int val)
124 pasemi_write_dma_reg(reg, val); 124 pasemi_write_dma_reg(reg, val);
125} 125}
126 126
127static struct pasemi_mac_rxring *rx_ring(struct pasemi_mac *mac) 127static struct pasemi_mac_rxring *rx_ring(const struct pasemi_mac *mac)
128{ 128{
129 return mac->rx; 129 return mac->rx;
130} 130}
131 131
132static struct pasemi_mac_txring *tx_ring(struct pasemi_mac *mac) 132static struct pasemi_mac_txring *tx_ring(const struct pasemi_mac *mac)
133{ 133{
134 return mac->tx; 134 return mac->tx;
135} 135}
136 136
137static inline void prefetch_skb(const struct sk_buff *skb)
138{
139 const void *d = skb;
140
141 prefetch(d);
142 prefetch(d+64);
143 prefetch(d+128);
144 prefetch(d+192);
145}
146
137static int mac_to_intf(struct pasemi_mac *mac) 147static int mac_to_intf(struct pasemi_mac *mac)
138{ 148{
139 struct pci_dev *pdev = mac->pdev; 149 struct pci_dev *pdev = mac->pdev;
@@ -211,19 +221,18 @@ static int pasemi_get_mac_addr(struct pasemi_mac *mac)
211 221
212static int pasemi_mac_unmap_tx_skb(struct pasemi_mac *mac, 222static int pasemi_mac_unmap_tx_skb(struct pasemi_mac *mac,
213 struct sk_buff *skb, 223 struct sk_buff *skb,
214 dma_addr_t *dmas) 224 const dma_addr_t *dmas)
215{ 225{
216 int f; 226 int f;
217 int nfrags = skb_shinfo(skb)->nr_frags; 227 int nfrags = skb_shinfo(skb)->nr_frags;
228 struct pci_dev *pdev = mac->dma_pdev;
218 229
219 pci_unmap_single(mac->dma_pdev, dmas[0], skb_headlen(skb), 230 pci_unmap_single(pdev, dmas[0], skb_headlen(skb), PCI_DMA_TODEVICE);
220 PCI_DMA_TODEVICE);
221 231
222 for (f = 0; f < nfrags; f++) { 232 for (f = 0; f < nfrags; f++) {
223 skb_frag_t *frag = &skb_shinfo(skb)->frags[f]; 233 skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
224 234
225 pci_unmap_page(mac->dma_pdev, dmas[f+1], frag->size, 235 pci_unmap_page(pdev, dmas[f+1], frag->size, PCI_DMA_TODEVICE);
226 PCI_DMA_TODEVICE);
227 } 236 }
228 dev_kfree_skb_irq(skb); 237 dev_kfree_skb_irq(skb);
229 238
@@ -233,7 +242,7 @@ static int pasemi_mac_unmap_tx_skb(struct pasemi_mac *mac,
233 return (nfrags + 3) & ~1; 242 return (nfrags + 3) & ~1;
234} 243}
235 244
236static int pasemi_mac_setup_rx_resources(struct net_device *dev) 245static int pasemi_mac_setup_rx_resources(const struct net_device *dev)
237{ 246{
238 struct pasemi_mac_rxring *ring; 247 struct pasemi_mac_rxring *ring;
239 struct pasemi_mac *mac = netdev_priv(dev); 248 struct pasemi_mac *mac = netdev_priv(dev);
@@ -277,7 +286,7 @@ static int pasemi_mac_setup_rx_resources(struct net_device *dev)
277 PAS_DMA_RXCHAN_BASEU_BRBH(ring->chan.ring_dma >> 32) | 286 PAS_DMA_RXCHAN_BASEU_BRBH(ring->chan.ring_dma >> 32) |
278 PAS_DMA_RXCHAN_BASEU_SIZ(RX_RING_SIZE >> 3)); 287 PAS_DMA_RXCHAN_BASEU_SIZ(RX_RING_SIZE >> 3));
279 288
280 cfg = PAS_DMA_RXCHAN_CFG_HBU(1); 289 cfg = PAS_DMA_RXCHAN_CFG_HBU(2);
281 290
282 if (translation_enabled()) 291 if (translation_enabled())
283 cfg |= PAS_DMA_RXCHAN_CFG_CTR; 292 cfg |= PAS_DMA_RXCHAN_CFG_CTR;
@@ -291,7 +300,7 @@ static int pasemi_mac_setup_rx_resources(struct net_device *dev)
291 PAS_DMA_RXINT_BASEU_BRBH(ring->buf_dma >> 32) | 300 PAS_DMA_RXINT_BASEU_BRBH(ring->buf_dma >> 32) |
292 PAS_DMA_RXINT_BASEU_SIZ(RX_RING_SIZE >> 3)); 301 PAS_DMA_RXINT_BASEU_SIZ(RX_RING_SIZE >> 3));
293 302
294 cfg = PAS_DMA_RXINT_CFG_DHL(1) | PAS_DMA_RXINT_CFG_L2 | 303 cfg = PAS_DMA_RXINT_CFG_DHL(2) | PAS_DMA_RXINT_CFG_L2 |
295 PAS_DMA_RXINT_CFG_LW | PAS_DMA_RXINT_CFG_RBP | 304 PAS_DMA_RXINT_CFG_LW | PAS_DMA_RXINT_CFG_RBP |
296 PAS_DMA_RXINT_CFG_HEN; 305 PAS_DMA_RXINT_CFG_HEN;
297 306
@@ -316,7 +325,7 @@ out_chan:
316} 325}
317 326
318static struct pasemi_mac_txring * 327static struct pasemi_mac_txring *
319pasemi_mac_setup_tx_resources(struct net_device *dev) 328pasemi_mac_setup_tx_resources(const struct net_device *dev)
320{ 329{
321 struct pasemi_mac *mac = netdev_priv(dev); 330 struct pasemi_mac *mac = netdev_priv(dev);
322 u32 val; 331 u32 val;
@@ -439,9 +448,10 @@ static void pasemi_mac_free_rx_resources(struct pasemi_mac *mac)
439 mac->rx = NULL; 448 mac->rx = NULL;
440} 449}
441 450
442static void pasemi_mac_replenish_rx_ring(struct net_device *dev, int limit) 451static void pasemi_mac_replenish_rx_ring(const struct net_device *dev,
452 const int limit)
443{ 453{
444 struct pasemi_mac *mac = netdev_priv(dev); 454 const struct pasemi_mac *mac = netdev_priv(dev);
445 struct pasemi_mac_rxring *rx = rx_ring(mac); 455 struct pasemi_mac_rxring *rx = rx_ring(mac);
446 int fill, count; 456 int fill, count;
447 457
@@ -492,7 +502,7 @@ static void pasemi_mac_replenish_rx_ring(struct net_device *dev, int limit)
492 (RX_RING_SIZE - 1); 502 (RX_RING_SIZE - 1);
493} 503}
494 504
495static void pasemi_mac_restart_rx_intr(struct pasemi_mac *mac) 505static void pasemi_mac_restart_rx_intr(const struct pasemi_mac *mac)
496{ 506{
497 unsigned int reg, pcnt; 507 unsigned int reg, pcnt;
498 /* Re-enable packet count interrupts: finally 508 /* Re-enable packet count interrupts: finally
@@ -506,7 +516,7 @@ static void pasemi_mac_restart_rx_intr(struct pasemi_mac *mac)
506 write_iob_reg(PAS_IOB_DMA_RXCH_RESET(mac->rx->chan.chno), reg); 516 write_iob_reg(PAS_IOB_DMA_RXCH_RESET(mac->rx->chan.chno), reg);
507} 517}
508 518
509static void pasemi_mac_restart_tx_intr(struct pasemi_mac *mac) 519static void pasemi_mac_restart_tx_intr(const struct pasemi_mac *mac)
510{ 520{
511 unsigned int reg, pcnt; 521 unsigned int reg, pcnt;
512 522
@@ -519,7 +529,8 @@ static void pasemi_mac_restart_tx_intr(struct pasemi_mac *mac)
519} 529}
520 530
521 531
522static inline void pasemi_mac_rx_error(struct pasemi_mac *mac, u64 macrx) 532static inline void pasemi_mac_rx_error(const struct pasemi_mac *mac,
533 const u64 macrx)
523{ 534{
524 unsigned int rcmdsta, ccmdsta; 535 unsigned int rcmdsta, ccmdsta;
525 struct pasemi_dmachan *chan = &rx_ring(mac)->chan; 536 struct pasemi_dmachan *chan = &rx_ring(mac)->chan;
@@ -537,7 +548,8 @@ static inline void pasemi_mac_rx_error(struct pasemi_mac *mac, u64 macrx)
537 rcmdsta, ccmdsta); 548 rcmdsta, ccmdsta);
538} 549}
539 550
540static inline void pasemi_mac_tx_error(struct pasemi_mac *mac, u64 mactx) 551static inline void pasemi_mac_tx_error(const struct pasemi_mac *mac,
552 const u64 mactx)
541{ 553{
542 unsigned int cmdsta; 554 unsigned int cmdsta;
543 struct pasemi_dmachan *chan = &tx_ring(mac)->chan; 555 struct pasemi_dmachan *chan = &tx_ring(mac)->chan;
@@ -553,19 +565,22 @@ static inline void pasemi_mac_tx_error(struct pasemi_mac *mac, u64 mactx)
553 printk(KERN_ERR "pasemi_mac: tcmdsta 0x%08x\n", cmdsta); 565 printk(KERN_ERR "pasemi_mac: tcmdsta 0x%08x\n", cmdsta);
554} 566}
555 567
556static int pasemi_mac_clean_rx(struct pasemi_mac_rxring *rx, int limit) 568static int pasemi_mac_clean_rx(struct pasemi_mac_rxring *rx,
569 const int limit)
557{ 570{
558 struct pasemi_dmachan *chan = &rx->chan; 571 const struct pasemi_dmachan *chan = &rx->chan;
559 struct pasemi_mac *mac = rx->mac; 572 struct pasemi_mac *mac = rx->mac;
573 struct pci_dev *pdev = mac->dma_pdev;
560 unsigned int n; 574 unsigned int n;
561 int count; 575 int count, buf_index, tot_bytes, packets;
562 struct pasemi_mac_buffer *info; 576 struct pasemi_mac_buffer *info;
563 struct sk_buff *skb; 577 struct sk_buff *skb;
564 unsigned int len; 578 unsigned int len;
565 u64 macrx; 579 u64 macrx, eval;
566 dma_addr_t dma; 580 dma_addr_t dma;
567 int buf_index; 581
568 u64 eval; 582 tot_bytes = 0;
583 packets = 0;
569 584
570 spin_lock(&rx->lock); 585 spin_lock(&rx->lock);
571 586
@@ -575,6 +590,7 @@ static int pasemi_mac_clean_rx(struct pasemi_mac_rxring *rx, int limit)
575 590
576 for (count = 0; count < limit; count++) { 591 for (count = 0; count < limit; count++) {
577 macrx = RX_DESC(rx, n); 592 macrx = RX_DESC(rx, n);
593 prefetch(&RX_DESC(rx, n+4));
578 594
579 if ((macrx & XCT_MACRX_E) || 595 if ((macrx & XCT_MACRX_E) ||
580 (*chan->status & PAS_STATUS_ERROR)) 596 (*chan->status & PAS_STATUS_ERROR))
@@ -596,12 +612,12 @@ static int pasemi_mac_clean_rx(struct pasemi_mac_rxring *rx, int limit)
596 612
597 skb = info->skb; 613 skb = info->skb;
598 614
599 prefetch(skb); 615 prefetch_skb(skb);
600 prefetch(&skb->data_len);
601 616
602 len = (macrx & XCT_MACRX_LLEN_M) >> XCT_MACRX_LLEN_S; 617 len = (macrx & XCT_MACRX_LLEN_M) >> XCT_MACRX_LLEN_S;
603 618
604 pci_unmap_single(mac->dma_pdev, dma, len, PCI_DMA_FROMDEVICE); 619 pci_unmap_single(pdev, dma, BUF_SIZE-LOCAL_SKB_ALIGN,
620 PCI_DMA_FROMDEVICE);
605 621
606 if (macrx & XCT_MACRX_CRC) { 622 if (macrx & XCT_MACRX_CRC) {
607 /* CRC error flagged */ 623 /* CRC error flagged */
@@ -628,9 +644,6 @@ static int pasemi_mac_clean_rx(struct pasemi_mac_rxring *rx, int limit)
628 644
629 info->dma = 0; 645 info->dma = 0;
630 646
631 /* Don't include CRC */
632 skb_put(skb, len-4);
633
634 if (likely((macrx & XCT_MACRX_HTY_M) == XCT_MACRX_HTY_IPV4_OK)) { 647 if (likely((macrx & XCT_MACRX_HTY_M) == XCT_MACRX_HTY_IPV4_OK)) {
635 skb->ip_summed = CHECKSUM_UNNECESSARY; 648 skb->ip_summed = CHECKSUM_UNNECESSARY;
636 skb->csum = (macrx & XCT_MACRX_CSUM_M) >> 649 skb->csum = (macrx & XCT_MACRX_CSUM_M) >>
@@ -638,8 +651,11 @@ static int pasemi_mac_clean_rx(struct pasemi_mac_rxring *rx, int limit)
638 } else 651 } else
639 skb->ip_summed = CHECKSUM_NONE; 652 skb->ip_summed = CHECKSUM_NONE;
640 653
641 mac->netdev->stats.rx_bytes += len; 654 packets++;
642 mac->netdev->stats.rx_packets++; 655 tot_bytes += len;
656
657 /* Don't include CRC */
658 skb_put(skb, len-4);
643 659
644 skb->protocol = eth_type_trans(skb, mac->netdev); 660 skb->protocol = eth_type_trans(skb, mac->netdev);
645 netif_receive_skb(skb); 661 netif_receive_skb(skb);
@@ -672,6 +688,9 @@ next:
672 688
673 pasemi_mac_replenish_rx_ring(mac->netdev, count); 689 pasemi_mac_replenish_rx_ring(mac->netdev, count);
674 690
691 mac->netdev->stats.rx_bytes += tot_bytes;
692 mac->netdev->stats.rx_packets += packets;
693
675 spin_unlock(&rx_ring(mac)->lock); 694 spin_unlock(&rx_ring(mac)->lock);
676 695
677 return count; 696 return count;
@@ -758,10 +777,10 @@ restart:
758 777
759static irqreturn_t pasemi_mac_rx_intr(int irq, void *data) 778static irqreturn_t pasemi_mac_rx_intr(int irq, void *data)
760{ 779{
761 struct pasemi_mac_rxring *rxring = data; 780 const struct pasemi_mac_rxring *rxring = data;
762 struct pasemi_mac *mac = rxring->mac; 781 struct pasemi_mac *mac = rxring->mac;
763 struct net_device *dev = mac->netdev; 782 struct net_device *dev = mac->netdev;
764 struct pasemi_dmachan *chan = &rxring->chan; 783 const struct pasemi_dmachan *chan = &rxring->chan;
765 unsigned int reg; 784 unsigned int reg;
766 785
767 if (!(*chan->status & PAS_STATUS_CAUSE_M)) 786 if (!(*chan->status & PAS_STATUS_CAUSE_M))
@@ -789,7 +808,7 @@ static irqreturn_t pasemi_mac_rx_intr(int irq, void *data)
789static irqreturn_t pasemi_mac_tx_intr(int irq, void *data) 808static irqreturn_t pasemi_mac_tx_intr(int irq, void *data)
790{ 809{
791 struct pasemi_mac_txring *txring = data; 810 struct pasemi_mac_txring *txring = data;
792 struct pasemi_dmachan *chan = &txring->chan; 811 const struct pasemi_dmachan *chan = &txring->chan;
793 unsigned int reg, pcnt; 812 unsigned int reg, pcnt;
794 813
795 if (!(*chan->status & PAS_STATUS_CAUSE_M)) 814 if (!(*chan->status & PAS_STATUS_CAUSE_M))
@@ -1158,6 +1177,7 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
1158 unsigned int map_size[MAX_SKB_FRAGS+1]; 1177 unsigned int map_size[MAX_SKB_FRAGS+1];
1159 unsigned long flags; 1178 unsigned long flags;
1160 int i, nfrags; 1179 int i, nfrags;
1180 int fill;
1161 1181
1162 dflags = XCT_MACTX_O | XCT_MACTX_ST | XCT_MACTX_CRC_PAD; 1182 dflags = XCT_MACTX_O | XCT_MACTX_ST | XCT_MACTX_CRC_PAD;
1163 1183
@@ -1205,6 +1225,8 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
1205 1225
1206 spin_lock_irqsave(&txring->lock, flags); 1226 spin_lock_irqsave(&txring->lock, flags);
1207 1227
1228 fill = txring->next_to_fill;
1229
1208 /* Avoid stepping on the same cache line that the DMA controller 1230 /* Avoid stepping on the same cache line that the DMA controller
1209 * is currently about to send, so leave at least 8 words available. 1231 * is currently about to send, so leave at least 8 words available.
1210 * Total free space needed is mactx + fragments + 8 1232 * Total free space needed is mactx + fragments + 8
@@ -1215,13 +1237,13 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
1215 goto out_err; 1237 goto out_err;
1216 } 1238 }
1217 1239
1218 TX_DESC(txring, txring->next_to_fill) = mactx; 1240 TX_DESC(txring, fill) = mactx;
1219 txring->next_to_fill++; 1241 fill++;
1220 TX_DESC_INFO(txring, txring->next_to_fill).skb = skb; 1242 TX_DESC_INFO(txring, fill).skb = skb;
1221 for (i = 0; i <= nfrags; i++) { 1243 for (i = 0; i <= nfrags; i++) {
1222 TX_DESC(txring, txring->next_to_fill+i) = 1244 TX_DESC(txring, fill+i) =
1223 XCT_PTR_LEN(map_size[i]) | XCT_PTR_ADDR(map[i]); 1245 XCT_PTR_LEN(map_size[i]) | XCT_PTR_ADDR(map[i]);
1224 TX_DESC_INFO(txring, txring->next_to_fill+i).dma = map[i]; 1246 TX_DESC_INFO(txring, fill+i).dma = map[i];
1225 } 1247 }
1226 1248
1227 /* We have to add an even number of 8-byte entries to the ring 1249 /* We have to add an even number of 8-byte entries to the ring
@@ -1231,8 +1253,7 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
1231 if (nfrags & 1) 1253 if (nfrags & 1)
1232 nfrags++; 1254 nfrags++;
1233 1255
1234 txring->next_to_fill = (txring->next_to_fill + nfrags + 1) & 1256 txring->next_to_fill = (fill + nfrags + 1) & (TX_RING_SIZE-1);
1235 (TX_RING_SIZE-1);
1236 1257
1237 dev->stats.tx_packets++; 1258 dev->stats.tx_packets++;
1238 dev->stats.tx_bytes += skb->len; 1259 dev->stats.tx_bytes += skb->len;
@@ -1255,7 +1276,7 @@ out_err_nolock:
1255 1276
1256static void pasemi_mac_set_rx_mode(struct net_device *dev) 1277static void pasemi_mac_set_rx_mode(struct net_device *dev)
1257{ 1278{
1258 struct pasemi_mac *mac = netdev_priv(dev); 1279 const struct pasemi_mac *mac = netdev_priv(dev);
1259 unsigned int flags; 1280 unsigned int flags;
1260 1281
1261 flags = read_mac_reg(mac, PAS_MAC_CFG_PCFG); 1282 flags = read_mac_reg(mac, PAS_MAC_CFG_PCFG);
@@ -1317,7 +1338,8 @@ pasemi_mac_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1317 1338
1318 netif_napi_add(dev, &mac->napi, pasemi_mac_poll, 64); 1339 netif_napi_add(dev, &mac->napi, pasemi_mac_poll, 64);
1319 1340
1320 dev->features = NETIF_F_IP_CSUM | NETIF_F_LLTX | NETIF_F_SG; 1341 dev->features = NETIF_F_IP_CSUM | NETIF_F_LLTX | NETIF_F_SG |
1342 NETIF_F_HIGHDMA;
1321 1343
1322 mac->dma_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa007, NULL); 1344 mac->dma_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa007, NULL);
1323 if (!mac->dma_pdev) { 1345 if (!mac->dma_pdev) {