diff options
| author | Jesse Brandeburg <jesse.brandeburg@intel.com> | 2006-01-18 16:01:32 -0500 |
|---|---|---|
| committer | Jeff Garzik <jgarzik@pobox.com> | 2006-01-18 16:17:57 -0500 |
| commit | b92ff8ee5763ee0b408f3cf2360f58dd7ea9c5da (patch) | |
| tree | e6e8a694181ea93265f786ddadfb7e66fb1c78aa /drivers/net | |
| parent | 35ec56bb78fda9c88cd1ad30e048ce5b4398d33f (diff) | |
[PATCH] e1000: Added RX buffer enhancements
Align the prefetches to a dword to help speed them up.
Recycle skb's and early replenish.
Force memory writes to complete before fetching more descriptors.
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: John Ronciak <john.ronciak@intel.com>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
Diffstat (limited to 'drivers/net')
| -rw-r--r-- | drivers/net/e1000/e1000_main.c | 137 |
1 files changed, 78 insertions, 59 deletions
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 73b2a7be2126..53a9cca06905 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c | |||
| @@ -1653,23 +1653,8 @@ e1000_setup_rctl(struct e1000_adapter *adapter) | |||
| 1653 | rctl |= adapter->rx_buffer_len << 0x11; | 1653 | rctl |= adapter->rx_buffer_len << 0x11; |
| 1654 | } else { | 1654 | } else { |
| 1655 | rctl &= ~E1000_RCTL_SZ_4096; | 1655 | rctl &= ~E1000_RCTL_SZ_4096; |
| 1656 | rctl |= E1000_RCTL_BSEX; | 1656 | rctl &= ~E1000_RCTL_BSEX; |
| 1657 | switch (adapter->rx_buffer_len) { | 1657 | rctl |= E1000_RCTL_SZ_2048; |
| 1658 | case E1000_RXBUFFER_2048: | ||
| 1659 | default: | ||
| 1660 | rctl |= E1000_RCTL_SZ_2048; | ||
| 1661 | rctl &= ~E1000_RCTL_BSEX; | ||
| 1662 | break; | ||
| 1663 | case E1000_RXBUFFER_4096: | ||
| 1664 | rctl |= E1000_RCTL_SZ_4096; | ||
| 1665 | break; | ||
| 1666 | case E1000_RXBUFFER_8192: | ||
| 1667 | rctl |= E1000_RCTL_SZ_8192; | ||
| 1668 | break; | ||
| 1669 | case E1000_RXBUFFER_16384: | ||
| 1670 | rctl |= E1000_RCTL_SZ_16384; | ||
| 1671 | break; | ||
| 1672 | } | ||
| 1673 | } | 1658 | } |
| 1674 | 1659 | ||
| 1675 | #ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT | 1660 | #ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT |
| @@ -3571,7 +3556,6 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, | |||
| 3571 | struct pci_dev *pdev = adapter->pdev; | 3556 | struct pci_dev *pdev = adapter->pdev; |
| 3572 | struct e1000_rx_desc *rx_desc; | 3557 | struct e1000_rx_desc *rx_desc; |
| 3573 | struct e1000_buffer *buffer_info; | 3558 | struct e1000_buffer *buffer_info; |
| 3574 | struct sk_buff *skb; | ||
| 3575 | unsigned long flags; | 3559 | unsigned long flags; |
| 3576 | uint32_t length; | 3560 | uint32_t length; |
| 3577 | uint8_t last_byte; | 3561 | uint8_t last_byte; |
| @@ -3581,9 +3565,10 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, | |||
| 3581 | 3565 | ||
| 3582 | i = rx_ring->next_to_clean; | 3566 | i = rx_ring->next_to_clean; |
| 3583 | rx_desc = E1000_RX_DESC(*rx_ring, i); | 3567 | rx_desc = E1000_RX_DESC(*rx_ring, i); |
| 3568 | buffer_info = &rx_ring->buffer_info[i]; | ||
| 3584 | 3569 | ||
| 3585 | while(rx_desc->status & E1000_RXD_STAT_DD) { | 3570 | while (rx_desc->status & E1000_RXD_STAT_DD) { |
| 3586 | buffer_info = &rx_ring->buffer_info[i]; | 3571 | struct sk_buff *skb; |
| 3587 | u8 status; | 3572 | u8 status; |
| 3588 | #ifdef CONFIG_E1000_NAPI | 3573 | #ifdef CONFIG_E1000_NAPI |
| 3589 | if(*work_done >= work_to_do) | 3574 | if(*work_done >= work_to_do) |
| @@ -3591,6 +3576,7 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, | |||
| 3591 | (*work_done)++; | 3576 | (*work_done)++; |
| 3592 | #endif | 3577 | #endif |
| 3593 | status = rx_desc->status; | 3578 | status = rx_desc->status; |
| 3579 | skb = buffer_info->skb; | ||
| 3594 | cleaned = TRUE; | 3580 | cleaned = TRUE; |
| 3595 | cleaned_count++; | 3581 | cleaned_count++; |
| 3596 | pci_unmap_single(pdev, | 3582 | pci_unmap_single(pdev, |
| @@ -3598,20 +3584,50 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, | |||
| 3598 | buffer_info->length, | 3584 | buffer_info->length, |
| 3599 | PCI_DMA_FROMDEVICE); | 3585 | PCI_DMA_FROMDEVICE); |
| 3600 | 3586 | ||
| 3601 | skb = buffer_info->skb; | ||
| 3602 | length = le16_to_cpu(rx_desc->length); | 3587 | length = le16_to_cpu(rx_desc->length); |
| 3603 | 3588 | ||
| 3604 | if(unlikely(!(rx_desc->status & E1000_RXD_STAT_EOP))) { | 3589 | skb_put(skb, length); |
| 3605 | /* All receives must fit into a single buffer */ | 3590 | |
| 3606 | E1000_DBG("%s: Receive packet consumed multiple" | 3591 | if (!(status & E1000_RXD_STAT_EOP)) { |
| 3607 | " buffers\n", netdev->name); | 3592 | if (!rx_ring->rx_skb_top) { |
| 3608 | dev_kfree_skb_irq(skb); | 3593 | rx_ring->rx_skb_top = skb; |
| 3594 | rx_ring->rx_skb_top->len = length; | ||
| 3595 | rx_ring->rx_skb_prev = skb; | ||
| 3596 | } else { | ||
| 3597 | if (skb_shinfo(rx_ring->rx_skb_top)->frag_list) { | ||
| 3598 | rx_ring->rx_skb_prev->next = skb; | ||
| 3599 | skb->prev = rx_ring->rx_skb_prev; | ||
| 3600 | } else { | ||
| 3601 | skb_shinfo(rx_ring->rx_skb_top)->frag_list = skb; | ||
| 3602 | } | ||
| 3603 | rx_ring->rx_skb_prev = skb; | ||
| 3604 | rx_ring->rx_skb_top->data_len += length; | ||
| 3605 | } | ||
| 3609 | goto next_desc; | 3606 | goto next_desc; |
| 3607 | } else { | ||
| 3608 | if (rx_ring->rx_skb_top) { | ||
| 3609 | if (skb_shinfo(rx_ring->rx_skb_top) | ||
| 3610 | ->frag_list) { | ||
| 3611 | rx_ring->rx_skb_prev->next = skb; | ||
| 3612 | skb->prev = rx_ring->rx_skb_prev; | ||
| 3613 | } else | ||
| 3614 | skb_shinfo(rx_ring->rx_skb_top) | ||
| 3615 | ->frag_list = skb; | ||
| 3616 | |||
| 3617 | rx_ring->rx_skb_top->data_len += length; | ||
| 3618 | rx_ring->rx_skb_top->len += | ||
| 3619 | rx_ring->rx_skb_top->data_len; | ||
| 3620 | |||
| 3621 | skb = rx_ring->rx_skb_top; | ||
| 3622 | multi_descriptor = TRUE; | ||
| 3623 | rx_ring->rx_skb_top = NULL; | ||
| 3624 | rx_ring->rx_skb_prev = NULL; | ||
| 3625 | } | ||
| 3610 | } | 3626 | } |
| 3611 | 3627 | ||
| 3612 | if(unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) { | 3628 | if(unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) { |
| 3613 | last_byte = *(skb->data + length - 1); | 3629 | last_byte = *(skb->data + length - 1); |
| 3614 | if(TBI_ACCEPT(&adapter->hw, rx_desc->status, | 3630 | if (TBI_ACCEPT(&adapter->hw, status, |
| 3615 | rx_desc->errors, length, last_byte)) { | 3631 | rx_desc->errors, length, last_byte)) { |
| 3616 | spin_lock_irqsave(&adapter->stats_lock, flags); | 3632 | spin_lock_irqsave(&adapter->stats_lock, flags); |
| 3617 | e1000_tbi_adjust_stats(&adapter->hw, | 3633 | e1000_tbi_adjust_stats(&adapter->hw, |
| @@ -3668,7 +3684,7 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, | |||
| 3668 | } | 3684 | } |
| 3669 | #else /* CONFIG_E1000_NAPI */ | 3685 | #else /* CONFIG_E1000_NAPI */ |
| 3670 | if(unlikely(adapter->vlgrp && | 3686 | if(unlikely(adapter->vlgrp && |
| 3671 | (rx_desc->status & E1000_RXD_STAT_VP))) { | 3687 | (status & E1000_RXD_STAT_VP))) { |
| 3672 | vlan_hwaccel_rx(skb, adapter->vlgrp, | 3688 | vlan_hwaccel_rx(skb, adapter->vlgrp, |
| 3673 | le16_to_cpu(rx_desc->special) & | 3689 | le16_to_cpu(rx_desc->special) & |
| 3674 | E1000_RXD_SPC_VLAN_MASK); | 3690 | E1000_RXD_SPC_VLAN_MASK); |
| @@ -3795,12 +3811,8 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, | |||
| 3795 | skb->protocol = eth_type_trans(skb, netdev); | 3811 | skb->protocol = eth_type_trans(skb, netdev); |
| 3796 | 3812 | ||
| 3797 | if(likely(rx_desc->wb.upper.header_status & | 3813 | if(likely(rx_desc->wb.upper.header_status & |
| 3798 | E1000_RXDPS_HDRSTAT_HDRSP)) { | 3814 | E1000_RXDPS_HDRSTAT_HDRSP)) |
| 3799 | adapter->rx_hdr_split++; | 3815 | adapter->rx_hdr_split++; |
| 3800 | #ifdef HAVE_RX_ZERO_COPY | ||
| 3801 | skb_shinfo(skb)->zero_copy = TRUE; | ||
| 3802 | #endif | ||
| 3803 | } | ||
| 3804 | #ifdef CONFIG_E1000_NAPI | 3816 | #ifdef CONFIG_E1000_NAPI |
| 3805 | if(unlikely(adapter->vlgrp && (staterr & E1000_RXD_STAT_VP))) { | 3817 | if(unlikely(adapter->vlgrp && (staterr & E1000_RXD_STAT_VP))) { |
| 3806 | vlan_hwaccel_receive_skb(skb, adapter->vlgrp, | 3818 | vlan_hwaccel_receive_skb(skb, adapter->vlgrp, |
| @@ -3940,20 +3952,22 @@ map_skb: | |||
| 3940 | rx_desc = E1000_RX_DESC(*rx_ring, i); | 3952 | rx_desc = E1000_RX_DESC(*rx_ring, i); |
| 3941 | rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma); | 3953 | rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma); |
| 3942 | 3954 | ||
| 3943 | if(unlikely((i & ~(E1000_RX_BUFFER_WRITE - 1)) == i)) { | ||
| 3944 | /* Force memory writes to complete before letting h/w | ||
| 3945 | * know there are new descriptors to fetch. (Only | ||
| 3946 | * applicable for weak-ordered memory model archs, | ||
| 3947 | * such as IA-64). */ | ||
| 3948 | wmb(); | ||
| 3949 | writel(i, adapter->hw.hw_addr + rx_ring->rdt); | ||
| 3950 | } | ||
| 3951 | |||
| 3952 | if(unlikely(++i == rx_ring->count)) i = 0; | 3955 | if(unlikely(++i == rx_ring->count)) i = 0; |
| 3953 | buffer_info = &rx_ring->buffer_info[i]; | 3956 | buffer_info = &rx_ring->buffer_info[i]; |
| 3954 | } | 3957 | } |
| 3955 | 3958 | ||
| 3956 | rx_ring->next_to_use = i; | 3959 | if (likely(rx_ring->next_to_use != i)) { |
| 3960 | rx_ring->next_to_use = i; | ||
| 3961 | if (unlikely(i-- == 0)) | ||
| 3962 | i = (rx_ring->count - 1); | ||
| 3963 | |||
| 3964 | /* Force memory writes to complete before letting h/w | ||
| 3965 | * know there are new descriptors to fetch. (Only | ||
| 3966 | * applicable for weak-ordered memory model archs, | ||
| 3967 | * such as IA-64). */ | ||
| 3968 | wmb(); | ||
| 3969 | writel(i, adapter->hw.hw_addr + rx_ring->rdt); | ||
| 3970 | } | ||
| 3957 | } | 3971 | } |
| 3958 | 3972 | ||
| 3959 | /** | 3973 | /** |
| @@ -3988,8 +4002,10 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, | |||
| 3988 | if (likely(!ps_page->ps_page[j])) { | 4002 | if (likely(!ps_page->ps_page[j])) { |
| 3989 | ps_page->ps_page[j] = | 4003 | ps_page->ps_page[j] = |
| 3990 | alloc_page(GFP_ATOMIC); | 4004 | alloc_page(GFP_ATOMIC); |
| 3991 | if (unlikely(!ps_page->ps_page[j])) | 4005 | if (unlikely(!ps_page->ps_page[j])) { |
| 4006 | adapter->alloc_rx_buff_failed++; | ||
| 3992 | goto no_buffers; | 4007 | goto no_buffers; |
| 4008 | } | ||
| 3993 | ps_page_dma->ps_page_dma[j] = | 4009 | ps_page_dma->ps_page_dma[j] = |
| 3994 | pci_map_page(pdev, | 4010 | pci_map_page(pdev, |
| 3995 | ps_page->ps_page[j], | 4011 | ps_page->ps_page[j], |
| @@ -4008,8 +4024,10 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, | |||
| 4008 | 4024 | ||
| 4009 | skb = dev_alloc_skb(adapter->rx_ps_bsize0 + NET_IP_ALIGN); | 4025 | skb = dev_alloc_skb(adapter->rx_ps_bsize0 + NET_IP_ALIGN); |
| 4010 | 4026 | ||
| 4011 | if(unlikely(!skb)) | 4027 | if (unlikely(!skb)) { |
| 4028 | adapter->alloc_rx_buff_failed++; | ||
| 4012 | break; | 4029 | break; |
| 4030 | } | ||
| 4013 | 4031 | ||
| 4014 | /* Make buffer alignment 2 beyond a 16 byte boundary | 4032 | /* Make buffer alignment 2 beyond a 16 byte boundary |
| 4015 | * this will result in a 16 byte aligned IP header after | 4033 | * this will result in a 16 byte aligned IP header after |
| @@ -4027,19 +4045,6 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, | |||
| 4027 | 4045 | ||
| 4028 | rx_desc->read.buffer_addr[0] = cpu_to_le64(buffer_info->dma); | 4046 | rx_desc->read.buffer_addr[0] = cpu_to_le64(buffer_info->dma); |
| 4029 | 4047 | ||
| 4030 | if(unlikely((i & ~(E1000_RX_BUFFER_WRITE - 1)) == i)) { | ||
| 4031 | /* Force memory writes to complete before letting h/w | ||
| 4032 | * know there are new descriptors to fetch. (Only | ||
| 4033 | * applicable for weak-ordered memory model archs, | ||
| 4034 | * such as IA-64). */ | ||
| 4035 | wmb(); | ||
| 4036 | /* Hardware increments by 16 bytes, but packet split | ||
| 4037 | * descriptors are 32 bytes...so we increment tail | ||
| 4038 | * twice as much. | ||
| 4039 | */ | ||
| 4040 | writel(i<<1, adapter->hw.hw_addr + rx_ring->rdt); | ||
| 4041 | } | ||
| 4042 | |||
| 4043 | if(unlikely(++i == rx_ring->count)) i = 0; | 4048 | if(unlikely(++i == rx_ring->count)) i = 0; |
| 4044 | buffer_info = &rx_ring->buffer_info[i]; | 4049 | buffer_info = &rx_ring->buffer_info[i]; |
| 4045 | ps_page = &rx_ring->ps_page[i]; | 4050 | ps_page = &rx_ring->ps_page[i]; |
| @@ -4047,7 +4052,21 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, | |||
| 4047 | } | 4052 | } |
| 4048 | 4053 | ||
| 4049 | no_buffers: | 4054 | no_buffers: |
| 4050 | rx_ring->next_to_use = i; | 4055 | if (likely(rx_ring->next_to_use != i)) { |
| 4056 | rx_ring->next_to_use = i; | ||
| 4057 | if (unlikely(i-- == 0)) i = (rx_ring->count - 1); | ||
| 4058 | |||
| 4059 | /* Force memory writes to complete before letting h/w | ||
| 4060 | * know there are new descriptors to fetch. (Only | ||
| 4061 | * applicable for weak-ordered memory model archs, | ||
| 4062 | * such as IA-64). */ | ||
| 4063 | wmb(); | ||
| 4064 | /* Hardware increments by 16 bytes, but packet split | ||
| 4065 | * descriptors are 32 bytes...so we increment tail | ||
| 4066 | * twice as much. | ||
| 4067 | */ | ||
| 4068 | writel(i<<1, adapter->hw.hw_addr + rx_ring->rdt); | ||
| 4069 | } | ||
| 4051 | } | 4070 | } |
| 4052 | 4071 | ||
| 4053 | /** | 4072 | /** |
