diff options
author | Olof Johansson <olof@lixom.net> | 2007-11-28 21:57:45 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-01-28 18:04:25 -0500 |
commit | 7e9916e9ddf23cd08107ed1a7fac429eea619313 (patch) | |
tree | 3e9627ca90727819236581b23736409c5498be32 | |
parent | 28ae79f531014bb3ad95b6efa0e0603069087bc5 (diff) |
pasemi_mac: SKB unmap optimization
pasemi_mac: SKB unmap optimization
Avoid touching skb_shinfo() in the unmap path, since it turns out to
normally cause cache misses and delays. instead, save number of fragments
in the TX_RING_INFO structures since that's all that's needed anyway.
Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
-rw-r--r-- | drivers/net/pasemi_mac.c | 39 |
1 files changed, 25 insertions, 14 deletions
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c index 98b639742680..ca773580cc51 100644 --- a/drivers/net/pasemi_mac.c +++ b/drivers/net/pasemi_mac.c | |||
@@ -253,11 +253,11 @@ static int get_skb_hdr(struct sk_buff *skb, void **iphdr, | |||
253 | } | 253 | } |
254 | 254 | ||
255 | static int pasemi_mac_unmap_tx_skb(struct pasemi_mac *mac, | 255 | static int pasemi_mac_unmap_tx_skb(struct pasemi_mac *mac, |
256 | const int nfrags, | ||
256 | struct sk_buff *skb, | 257 | struct sk_buff *skb, |
257 | const dma_addr_t *dmas) | 258 | const dma_addr_t *dmas) |
258 | { | 259 | { |
259 | int f; | 260 | int f; |
260 | int nfrags = skb_shinfo(skb)->nr_frags; | ||
261 | struct pci_dev *pdev = mac->dma_pdev; | 261 | struct pci_dev *pdev = mac->dma_pdev; |
262 | 262 | ||
263 | pci_unmap_single(pdev, dmas[0], skb_headlen(skb), PCI_DMA_TODEVICE); | 263 | pci_unmap_single(pdev, dmas[0], skb_headlen(skb), PCI_DMA_TODEVICE); |
@@ -425,7 +425,7 @@ static void pasemi_mac_free_tx_resources(struct pasemi_mac *mac) | |||
425 | unsigned int i, j; | 425 | unsigned int i, j; |
426 | struct pasemi_mac_buffer *info; | 426 | struct pasemi_mac_buffer *info; |
427 | dma_addr_t dmas[MAX_SKB_FRAGS+1]; | 427 | dma_addr_t dmas[MAX_SKB_FRAGS+1]; |
428 | int freed; | 428 | int freed, nfrags; |
429 | int start, limit; | 429 | int start, limit; |
430 | 430 | ||
431 | start = txring->next_to_clean; | 431 | start = txring->next_to_clean; |
@@ -438,10 +438,12 @@ static void pasemi_mac_free_tx_resources(struct pasemi_mac *mac) | |||
438 | for (i = start; i < limit; i += freed) { | 438 | for (i = start; i < limit; i += freed) { |
439 | info = &txring->ring_info[(i+1) & (TX_RING_SIZE-1)]; | 439 | info = &txring->ring_info[(i+1) & (TX_RING_SIZE-1)]; |
440 | if (info->dma && info->skb) { | 440 | if (info->dma && info->skb) { |
441 | for (j = 0; j <= skb_shinfo(info->skb)->nr_frags; j++) | 441 | nfrags = skb_shinfo(info->skb)->nr_frags; |
442 | for (j = 0; j <= nfrags; j++) | ||
442 | dmas[j] = txring->ring_info[(i+1+j) & | 443 | dmas[j] = txring->ring_info[(i+1+j) & |
443 | (TX_RING_SIZE-1)].dma; | 444 | (TX_RING_SIZE-1)].dma; |
444 | freed = pasemi_mac_unmap_tx_skb(mac, info->skb, dmas); | 445 | freed = pasemi_mac_unmap_tx_skb(mac, nfrags, |
446 | info->skb, dmas); | ||
445 | } else | 447 | } else |
446 | freed = 2; | 448 | freed = 2; |
447 | } | 449 | } |
@@ -749,6 +751,8 @@ static int pasemi_mac_clean_tx(struct pasemi_mac_txring *txring) | |||
749 | unsigned long flags; | 751 | unsigned long flags; |
750 | struct sk_buff *skbs[TX_CLEAN_BATCHSIZE]; | 752 | struct sk_buff *skbs[TX_CLEAN_BATCHSIZE]; |
751 | dma_addr_t dmas[TX_CLEAN_BATCHSIZE][MAX_SKB_FRAGS+1]; | 753 | dma_addr_t dmas[TX_CLEAN_BATCHSIZE][MAX_SKB_FRAGS+1]; |
754 | int nf[TX_CLEAN_BATCHSIZE]; | ||
755 | int nr_frags; | ||
752 | 756 | ||
753 | total_count = 0; | 757 | total_count = 0; |
754 | batch_limit = TX_CLEAN_BATCHSIZE; | 758 | batch_limit = TX_CLEAN_BATCHSIZE; |
@@ -758,6 +762,8 @@ restart: | |||
758 | start = txring->next_to_clean; | 762 | start = txring->next_to_clean; |
759 | ring_limit = txring->next_to_fill; | 763 | ring_limit = txring->next_to_fill; |
760 | 764 | ||
765 | prefetch(&TX_DESC_INFO(txring, start+1).skb); | ||
766 | |||
761 | /* Compensate for when fill has wrapped but clean has not */ | 767 | /* Compensate for when fill has wrapped but clean has not */ |
762 | if (start > ring_limit) | 768 | if (start > ring_limit) |
763 | ring_limit += TX_RING_SIZE; | 769 | ring_limit += TX_RING_SIZE; |
@@ -771,6 +777,9 @@ restart: | |||
771 | u64 mactx = TX_DESC(txring, i); | 777 | u64 mactx = TX_DESC(txring, i); |
772 | struct sk_buff *skb; | 778 | struct sk_buff *skb; |
773 | 779 | ||
780 | skb = TX_DESC_INFO(txring, i+1).skb; | ||
781 | nr_frags = TX_DESC_INFO(txring, i).dma; | ||
782 | |||
774 | if ((mactx & XCT_MACTX_E) || | 783 | if ((mactx & XCT_MACTX_E) || |
775 | (*chan->status & PAS_STATUS_ERROR)) | 784 | (*chan->status & PAS_STATUS_ERROR)) |
776 | pasemi_mac_tx_error(mac, mactx); | 785 | pasemi_mac_tx_error(mac, mactx); |
@@ -779,21 +788,22 @@ restart: | |||
779 | /* Not yet transmitted */ | 788 | /* Not yet transmitted */ |
780 | break; | 789 | break; |
781 | 790 | ||
782 | skb = TX_DESC_INFO(txring, i+1).skb; | 791 | buf_count = 2 + nr_frags; |
783 | skbs[descr_count] = skb; | 792 | /* Since we always fill with an even number of entries, make |
793 | * sure we skip any unused one at the end as well. | ||
794 | */ | ||
795 | if (buf_count & 1) | ||
796 | buf_count++; | ||
784 | 797 | ||
785 | buf_count = 2 + skb_shinfo(skb)->nr_frags; | 798 | for (j = 0; j <= nr_frags; j++) |
786 | for (j = 0; j <= skb_shinfo(skb)->nr_frags; j++) | ||
787 | dmas[descr_count][j] = TX_DESC_INFO(txring, i+1+j).dma; | 799 | dmas[descr_count][j] = TX_DESC_INFO(txring, i+1+j).dma; |
788 | 800 | ||
801 | skbs[descr_count] = skb; | ||
802 | nf[descr_count] = nr_frags; | ||
803 | |||
789 | TX_DESC(txring, i) = 0; | 804 | TX_DESC(txring, i) = 0; |
790 | TX_DESC(txring, i+1) = 0; | 805 | TX_DESC(txring, i+1) = 0; |
791 | 806 | ||
792 | /* Since we always fill with an even number of entries, make | ||
793 | * sure we skip any unused one at the end as well. | ||
794 | */ | ||
795 | if (buf_count & 1) | ||
796 | buf_count++; | ||
797 | descr_count++; | 807 | descr_count++; |
798 | } | 808 | } |
799 | txring->next_to_clean = i & (TX_RING_SIZE-1); | 809 | txring->next_to_clean = i & (TX_RING_SIZE-1); |
@@ -802,7 +812,7 @@ restart: | |||
802 | netif_wake_queue(mac->netdev); | 812 | netif_wake_queue(mac->netdev); |
803 | 813 | ||
804 | for (i = 0; i < descr_count; i++) | 814 | for (i = 0; i < descr_count; i++) |
805 | pasemi_mac_unmap_tx_skb(mac, skbs[i], dmas[i]); | 815 | pasemi_mac_unmap_tx_skb(mac, nf[i], skbs[i], dmas[i]); |
806 | 816 | ||
807 | total_count += descr_count; | 817 | total_count += descr_count; |
808 | 818 | ||
@@ -1299,6 +1309,7 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev) | |||
1299 | } | 1309 | } |
1300 | 1310 | ||
1301 | TX_DESC(txring, fill) = mactx; | 1311 | TX_DESC(txring, fill) = mactx; |
1312 | TX_DESC_INFO(txring, fill).dma = nfrags; | ||
1302 | fill++; | 1313 | fill++; |
1303 | TX_DESC_INFO(txring, fill).skb = skb; | 1314 | TX_DESC_INFO(txring, fill).skb = skb; |
1304 | for (i = 0; i <= nfrags; i++) { | 1315 | for (i = 0; i <= nfrags; i++) { |