aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesse Brandeburg <jesse.brandeburg@intel.com>2006-01-18 16:01:32 -0500
committerJeff Garzik <jgarzik@pobox.com>2006-01-18 16:17:57 -0500
commitb92ff8ee5763ee0b408f3cf2360f58dd7ea9c5da (patch)
treee6e8a694181ea93265f786ddadfb7e66fb1c78aa
parent35ec56bb78fda9c88cd1ad30e048ce5b4398d33f (diff)
[PATCH] e1000: Added RX buffer enhancements
Align the prefetches to a dword to help speed them up. Recycle skb's and early replenish. Force memory writes to complete before fetching more descriptors. Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> Signed-off-by: John Ronciak <john.ronciak@intel.com> Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
-rw-r--r--drivers/net/e1000/e1000_main.c137
1 files changed, 78 insertions, 59 deletions
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 73b2a7be2126..53a9cca06905 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -1653,23 +1653,8 @@ e1000_setup_rctl(struct e1000_adapter *adapter)
1653 rctl |= adapter->rx_buffer_len << 0x11; 1653 rctl |= adapter->rx_buffer_len << 0x11;
1654 } else { 1654 } else {
1655 rctl &= ~E1000_RCTL_SZ_4096; 1655 rctl &= ~E1000_RCTL_SZ_4096;
1656 rctl |= E1000_RCTL_BSEX; 1656 rctl &= ~E1000_RCTL_BSEX;
1657 switch (adapter->rx_buffer_len) { 1657 rctl |= E1000_RCTL_SZ_2048;
1658 case E1000_RXBUFFER_2048:
1659 default:
1660 rctl |= E1000_RCTL_SZ_2048;
1661 rctl &= ~E1000_RCTL_BSEX;
1662 break;
1663 case E1000_RXBUFFER_4096:
1664 rctl |= E1000_RCTL_SZ_4096;
1665 break;
1666 case E1000_RXBUFFER_8192:
1667 rctl |= E1000_RCTL_SZ_8192;
1668 break;
1669 case E1000_RXBUFFER_16384:
1670 rctl |= E1000_RCTL_SZ_16384;
1671 break;
1672 }
1673 } 1658 }
1674 1659
1675#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT 1660#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT
@@ -3571,7 +3556,6 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter,
3571 struct pci_dev *pdev = adapter->pdev; 3556 struct pci_dev *pdev = adapter->pdev;
3572 struct e1000_rx_desc *rx_desc; 3557 struct e1000_rx_desc *rx_desc;
3573 struct e1000_buffer *buffer_info; 3558 struct e1000_buffer *buffer_info;
3574 struct sk_buff *skb;
3575 unsigned long flags; 3559 unsigned long flags;
3576 uint32_t length; 3560 uint32_t length;
3577 uint8_t last_byte; 3561 uint8_t last_byte;
@@ -3581,9 +3565,10 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter,
3581 3565
3582 i = rx_ring->next_to_clean; 3566 i = rx_ring->next_to_clean;
3583 rx_desc = E1000_RX_DESC(*rx_ring, i); 3567 rx_desc = E1000_RX_DESC(*rx_ring, i);
3568 buffer_info = &rx_ring->buffer_info[i];
3584 3569
3585 while(rx_desc->status & E1000_RXD_STAT_DD) { 3570 while (rx_desc->status & E1000_RXD_STAT_DD) {
3586 buffer_info = &rx_ring->buffer_info[i]; 3571 struct sk_buff *skb;
3587 u8 status; 3572 u8 status;
3588#ifdef CONFIG_E1000_NAPI 3573#ifdef CONFIG_E1000_NAPI
3589 if(*work_done >= work_to_do) 3574 if(*work_done >= work_to_do)
@@ -3591,6 +3576,7 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter,
3591 (*work_done)++; 3576 (*work_done)++;
3592#endif 3577#endif
3593 status = rx_desc->status; 3578 status = rx_desc->status;
3579 skb = buffer_info->skb;
3594 cleaned = TRUE; 3580 cleaned = TRUE;
3595 cleaned_count++; 3581 cleaned_count++;
3596 pci_unmap_single(pdev, 3582 pci_unmap_single(pdev,
@@ -3598,20 +3584,50 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter,
3598 buffer_info->length, 3584 buffer_info->length,
3599 PCI_DMA_FROMDEVICE); 3585 PCI_DMA_FROMDEVICE);
3600 3586
3601 skb = buffer_info->skb;
3602 length = le16_to_cpu(rx_desc->length); 3587 length = le16_to_cpu(rx_desc->length);
3603 3588
3604 if(unlikely(!(rx_desc->status & E1000_RXD_STAT_EOP))) { 3589 skb_put(skb, length);
3605 /* All receives must fit into a single buffer */ 3590
3606 E1000_DBG("%s: Receive packet consumed multiple" 3591 if (!(status & E1000_RXD_STAT_EOP)) {
3607 " buffers\n", netdev->name); 3592 if (!rx_ring->rx_skb_top) {
3608 dev_kfree_skb_irq(skb); 3593 rx_ring->rx_skb_top = skb;
3594 rx_ring->rx_skb_top->len = length;
3595 rx_ring->rx_skb_prev = skb;
3596 } else {
3597 if (skb_shinfo(rx_ring->rx_skb_top)->frag_list) {
3598 rx_ring->rx_skb_prev->next = skb;
3599 skb->prev = rx_ring->rx_skb_prev;
3600 } else {
3601 skb_shinfo(rx_ring->rx_skb_top)->frag_list = skb;
3602 }
3603 rx_ring->rx_skb_prev = skb;
3604 rx_ring->rx_skb_top->data_len += length;
3605 }
3609 goto next_desc; 3606 goto next_desc;
3607 } else {
3608 if (rx_ring->rx_skb_top) {
3609 if (skb_shinfo(rx_ring->rx_skb_top)
3610 ->frag_list) {
3611 rx_ring->rx_skb_prev->next = skb;
3612 skb->prev = rx_ring->rx_skb_prev;
3613 } else
3614 skb_shinfo(rx_ring->rx_skb_top)
3615 ->frag_list = skb;
3616
3617 rx_ring->rx_skb_top->data_len += length;
3618 rx_ring->rx_skb_top->len +=
3619 rx_ring->rx_skb_top->data_len;
3620
3621 skb = rx_ring->rx_skb_top;
3622 multi_descriptor = TRUE;
3623 rx_ring->rx_skb_top = NULL;
3624 rx_ring->rx_skb_prev = NULL;
3625 }
3610 } 3626 }
3611 3627
3612 if(unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) { 3628 if(unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) {
3613 last_byte = *(skb->data + length - 1); 3629 last_byte = *(skb->data + length - 1);
3614 if(TBI_ACCEPT(&adapter->hw, rx_desc->status, 3630 if (TBI_ACCEPT(&adapter->hw, status,
3615 rx_desc->errors, length, last_byte)) { 3631 rx_desc->errors, length, last_byte)) {
3616 spin_lock_irqsave(&adapter->stats_lock, flags); 3632 spin_lock_irqsave(&adapter->stats_lock, flags);
3617 e1000_tbi_adjust_stats(&adapter->hw, 3633 e1000_tbi_adjust_stats(&adapter->hw,
@@ -3668,7 +3684,7 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter,
3668 } 3684 }
3669#else /* CONFIG_E1000_NAPI */ 3685#else /* CONFIG_E1000_NAPI */
3670 if(unlikely(adapter->vlgrp && 3686 if(unlikely(adapter->vlgrp &&
3671 (rx_desc->status & E1000_RXD_STAT_VP))) { 3687 (status & E1000_RXD_STAT_VP))) {
3672 vlan_hwaccel_rx(skb, adapter->vlgrp, 3688 vlan_hwaccel_rx(skb, adapter->vlgrp,
3673 le16_to_cpu(rx_desc->special) & 3689 le16_to_cpu(rx_desc->special) &
3674 E1000_RXD_SPC_VLAN_MASK); 3690 E1000_RXD_SPC_VLAN_MASK);
@@ -3795,12 +3811,8 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter,
3795 skb->protocol = eth_type_trans(skb, netdev); 3811 skb->protocol = eth_type_trans(skb, netdev);
3796 3812
3797 if(likely(rx_desc->wb.upper.header_status & 3813 if(likely(rx_desc->wb.upper.header_status &
3798 E1000_RXDPS_HDRSTAT_HDRSP)) { 3814 E1000_RXDPS_HDRSTAT_HDRSP))
3799 adapter->rx_hdr_split++; 3815 adapter->rx_hdr_split++;
3800#ifdef HAVE_RX_ZERO_COPY
3801 skb_shinfo(skb)->zero_copy = TRUE;
3802#endif
3803 }
3804#ifdef CONFIG_E1000_NAPI 3816#ifdef CONFIG_E1000_NAPI
3805 if(unlikely(adapter->vlgrp && (staterr & E1000_RXD_STAT_VP))) { 3817 if(unlikely(adapter->vlgrp && (staterr & E1000_RXD_STAT_VP))) {
3806 vlan_hwaccel_receive_skb(skb, adapter->vlgrp, 3818 vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
@@ -3940,20 +3952,22 @@ map_skb:
3940 rx_desc = E1000_RX_DESC(*rx_ring, i); 3952 rx_desc = E1000_RX_DESC(*rx_ring, i);
3941 rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma); 3953 rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
3942 3954
3943 if(unlikely((i & ~(E1000_RX_BUFFER_WRITE - 1)) == i)) {
3944 /* Force memory writes to complete before letting h/w
3945 * know there are new descriptors to fetch. (Only
3946 * applicable for weak-ordered memory model archs,
3947 * such as IA-64). */
3948 wmb();
3949 writel(i, adapter->hw.hw_addr + rx_ring->rdt);
3950 }
3951
3952 if(unlikely(++i == rx_ring->count)) i = 0; 3955 if(unlikely(++i == rx_ring->count)) i = 0;
3953 buffer_info = &rx_ring->buffer_info[i]; 3956 buffer_info = &rx_ring->buffer_info[i];
3954 } 3957 }
3955 3958
3956 rx_ring->next_to_use = i; 3959 if (likely(rx_ring->next_to_use != i)) {
3960 rx_ring->next_to_use = i;
3961 if (unlikely(i-- == 0))
3962 i = (rx_ring->count - 1);
3963
3964 /* Force memory writes to complete before letting h/w
3965 * know there are new descriptors to fetch. (Only
3966 * applicable for weak-ordered memory model archs,
3967 * such as IA-64). */
3968 wmb();
3969 writel(i, adapter->hw.hw_addr + rx_ring->rdt);
3970 }
3957} 3971}
3958 3972
3959/** 3973/**
@@ -3988,8 +4002,10 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
3988 if (likely(!ps_page->ps_page[j])) { 4002 if (likely(!ps_page->ps_page[j])) {
3989 ps_page->ps_page[j] = 4003 ps_page->ps_page[j] =
3990 alloc_page(GFP_ATOMIC); 4004 alloc_page(GFP_ATOMIC);
3991 if (unlikely(!ps_page->ps_page[j])) 4005 if (unlikely(!ps_page->ps_page[j])) {
4006 adapter->alloc_rx_buff_failed++;
3992 goto no_buffers; 4007 goto no_buffers;
4008 }
3993 ps_page_dma->ps_page_dma[j] = 4009 ps_page_dma->ps_page_dma[j] =
3994 pci_map_page(pdev, 4010 pci_map_page(pdev,
3995 ps_page->ps_page[j], 4011 ps_page->ps_page[j],
@@ -4008,8 +4024,10 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
4008 4024
4009 skb = dev_alloc_skb(adapter->rx_ps_bsize0 + NET_IP_ALIGN); 4025 skb = dev_alloc_skb(adapter->rx_ps_bsize0 + NET_IP_ALIGN);
4010 4026
4011 if(unlikely(!skb)) 4027 if (unlikely(!skb)) {
4028 adapter->alloc_rx_buff_failed++;
4012 break; 4029 break;
4030 }
4013 4031
4014 /* Make buffer alignment 2 beyond a 16 byte boundary 4032 /* Make buffer alignment 2 beyond a 16 byte boundary
4015 * this will result in a 16 byte aligned IP header after 4033 * this will result in a 16 byte aligned IP header after
@@ -4027,19 +4045,6 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
4027 4045
4028 rx_desc->read.buffer_addr[0] = cpu_to_le64(buffer_info->dma); 4046 rx_desc->read.buffer_addr[0] = cpu_to_le64(buffer_info->dma);
4029 4047
4030 if(unlikely((i & ~(E1000_RX_BUFFER_WRITE - 1)) == i)) {
4031 /* Force memory writes to complete before letting h/w
4032 * know there are new descriptors to fetch. (Only
4033 * applicable for weak-ordered memory model archs,
4034 * such as IA-64). */
4035 wmb();
4036 /* Hardware increments by 16 bytes, but packet split
4037 * descriptors are 32 bytes...so we increment tail
4038 * twice as much.
4039 */
4040 writel(i<<1, adapter->hw.hw_addr + rx_ring->rdt);
4041 }
4042
4043 if(unlikely(++i == rx_ring->count)) i = 0; 4048 if(unlikely(++i == rx_ring->count)) i = 0;
4044 buffer_info = &rx_ring->buffer_info[i]; 4049 buffer_info = &rx_ring->buffer_info[i];
4045 ps_page = &rx_ring->ps_page[i]; 4050 ps_page = &rx_ring->ps_page[i];
@@ -4047,7 +4052,21 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
4047 } 4052 }
4048 4053
4049no_buffers: 4054no_buffers:
4050 rx_ring->next_to_use = i; 4055 if (likely(rx_ring->next_to_use != i)) {
4056 rx_ring->next_to_use = i;
4057 if (unlikely(i-- == 0)) i = (rx_ring->count - 1);
4058
4059 /* Force memory writes to complete before letting h/w
4060 * know there are new descriptors to fetch. (Only
4061 * applicable for weak-ordered memory model archs,
4062 * such as IA-64). */
4063 wmb();
4064 /* Hardware increments by 16 bytes, but packet split
4065 * descriptors are 32 bytes...so we increment tail
4066 * twice as much.
4067 */
4068 writel(i<<1, adapter->hw.hw_addr + rx_ring->rdt);
4069 }
4051} 4070}
4052 4071
4053/** 4072/**