diff options
author | Jesse Brandeburg <jesse.brandeburg@intel.com> | 2006-01-18 16:01:32 -0500 |
---|---|---|
committer | Jeff Garzik <jgarzik@pobox.com> | 2006-01-18 16:17:57 -0500 |
commit | b92ff8ee5763ee0b408f3cf2360f58dd7ea9c5da (patch) | |
tree | e6e8a694181ea93265f786ddadfb7e66fb1c78aa | |
parent | 35ec56bb78fda9c88cd1ad30e048ce5b4398d33f (diff) |
[PATCH] e1000: Added RX buffer enhancements
Align the prefetches to a dword to help speed them up.
Recycle skb's and early replenish.
Force memory writes to complete before fetching more descriptors.
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: John Ronciak <john.ronciak@intel.com>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
-rw-r--r-- | drivers/net/e1000/e1000_main.c | 137 |
1 files changed, 78 insertions, 59 deletions
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 73b2a7be2126..53a9cca06905 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c | |||
@@ -1653,23 +1653,8 @@ e1000_setup_rctl(struct e1000_adapter *adapter) | |||
1653 | rctl |= adapter->rx_buffer_len << 0x11; | 1653 | rctl |= adapter->rx_buffer_len << 0x11; |
1654 | } else { | 1654 | } else { |
1655 | rctl &= ~E1000_RCTL_SZ_4096; | 1655 | rctl &= ~E1000_RCTL_SZ_4096; |
1656 | rctl |= E1000_RCTL_BSEX; | 1656 | rctl &= ~E1000_RCTL_BSEX; |
1657 | switch (adapter->rx_buffer_len) { | 1657 | rctl |= E1000_RCTL_SZ_2048; |
1658 | case E1000_RXBUFFER_2048: | ||
1659 | default: | ||
1660 | rctl |= E1000_RCTL_SZ_2048; | ||
1661 | rctl &= ~E1000_RCTL_BSEX; | ||
1662 | break; | ||
1663 | case E1000_RXBUFFER_4096: | ||
1664 | rctl |= E1000_RCTL_SZ_4096; | ||
1665 | break; | ||
1666 | case E1000_RXBUFFER_8192: | ||
1667 | rctl |= E1000_RCTL_SZ_8192; | ||
1668 | break; | ||
1669 | case E1000_RXBUFFER_16384: | ||
1670 | rctl |= E1000_RCTL_SZ_16384; | ||
1671 | break; | ||
1672 | } | ||
1673 | } | 1658 | } |
1674 | 1659 | ||
1675 | #ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT | 1660 | #ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT |
@@ -3571,7 +3556,6 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, | |||
3571 | struct pci_dev *pdev = adapter->pdev; | 3556 | struct pci_dev *pdev = adapter->pdev; |
3572 | struct e1000_rx_desc *rx_desc; | 3557 | struct e1000_rx_desc *rx_desc; |
3573 | struct e1000_buffer *buffer_info; | 3558 | struct e1000_buffer *buffer_info; |
3574 | struct sk_buff *skb; | ||
3575 | unsigned long flags; | 3559 | unsigned long flags; |
3576 | uint32_t length; | 3560 | uint32_t length; |
3577 | uint8_t last_byte; | 3561 | uint8_t last_byte; |
@@ -3581,9 +3565,10 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, | |||
3581 | 3565 | ||
3582 | i = rx_ring->next_to_clean; | 3566 | i = rx_ring->next_to_clean; |
3583 | rx_desc = E1000_RX_DESC(*rx_ring, i); | 3567 | rx_desc = E1000_RX_DESC(*rx_ring, i); |
3568 | buffer_info = &rx_ring->buffer_info[i]; | ||
3584 | 3569 | ||
3585 | while(rx_desc->status & E1000_RXD_STAT_DD) { | 3570 | while (rx_desc->status & E1000_RXD_STAT_DD) { |
3586 | buffer_info = &rx_ring->buffer_info[i]; | 3571 | struct sk_buff *skb; |
3587 | u8 status; | 3572 | u8 status; |
3588 | #ifdef CONFIG_E1000_NAPI | 3573 | #ifdef CONFIG_E1000_NAPI |
3589 | if(*work_done >= work_to_do) | 3574 | if(*work_done >= work_to_do) |
@@ -3591,6 +3576,7 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, | |||
3591 | (*work_done)++; | 3576 | (*work_done)++; |
3592 | #endif | 3577 | #endif |
3593 | status = rx_desc->status; | 3578 | status = rx_desc->status; |
3579 | skb = buffer_info->skb; | ||
3594 | cleaned = TRUE; | 3580 | cleaned = TRUE; |
3595 | cleaned_count++; | 3581 | cleaned_count++; |
3596 | pci_unmap_single(pdev, | 3582 | pci_unmap_single(pdev, |
@@ -3598,20 +3584,50 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, | |||
3598 | buffer_info->length, | 3584 | buffer_info->length, |
3599 | PCI_DMA_FROMDEVICE); | 3585 | PCI_DMA_FROMDEVICE); |
3600 | 3586 | ||
3601 | skb = buffer_info->skb; | ||
3602 | length = le16_to_cpu(rx_desc->length); | 3587 | length = le16_to_cpu(rx_desc->length); |
3603 | 3588 | ||
3604 | if(unlikely(!(rx_desc->status & E1000_RXD_STAT_EOP))) { | 3589 | skb_put(skb, length); |
3605 | /* All receives must fit into a single buffer */ | 3590 | |
3606 | E1000_DBG("%s: Receive packet consumed multiple" | 3591 | if (!(status & E1000_RXD_STAT_EOP)) { |
3607 | " buffers\n", netdev->name); | 3592 | if (!rx_ring->rx_skb_top) { |
3608 | dev_kfree_skb_irq(skb); | 3593 | rx_ring->rx_skb_top = skb; |
3594 | rx_ring->rx_skb_top->len = length; | ||
3595 | rx_ring->rx_skb_prev = skb; | ||
3596 | } else { | ||
3597 | if (skb_shinfo(rx_ring->rx_skb_top)->frag_list) { | ||
3598 | rx_ring->rx_skb_prev->next = skb; | ||
3599 | skb->prev = rx_ring->rx_skb_prev; | ||
3600 | } else { | ||
3601 | skb_shinfo(rx_ring->rx_skb_top)->frag_list = skb; | ||
3602 | } | ||
3603 | rx_ring->rx_skb_prev = skb; | ||
3604 | rx_ring->rx_skb_top->data_len += length; | ||
3605 | } | ||
3609 | goto next_desc; | 3606 | goto next_desc; |
3607 | } else { | ||
3608 | if (rx_ring->rx_skb_top) { | ||
3609 | if (skb_shinfo(rx_ring->rx_skb_top) | ||
3610 | ->frag_list) { | ||
3611 | rx_ring->rx_skb_prev->next = skb; | ||
3612 | skb->prev = rx_ring->rx_skb_prev; | ||
3613 | } else | ||
3614 | skb_shinfo(rx_ring->rx_skb_top) | ||
3615 | ->frag_list = skb; | ||
3616 | |||
3617 | rx_ring->rx_skb_top->data_len += length; | ||
3618 | rx_ring->rx_skb_top->len += | ||
3619 | rx_ring->rx_skb_top->data_len; | ||
3620 | |||
3621 | skb = rx_ring->rx_skb_top; | ||
3622 | multi_descriptor = TRUE; | ||
3623 | rx_ring->rx_skb_top = NULL; | ||
3624 | rx_ring->rx_skb_prev = NULL; | ||
3625 | } | ||
3610 | } | 3626 | } |
3611 | 3627 | ||
3612 | if(unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) { | 3628 | if(unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) { |
3613 | last_byte = *(skb->data + length - 1); | 3629 | last_byte = *(skb->data + length - 1); |
3614 | if(TBI_ACCEPT(&adapter->hw, rx_desc->status, | 3630 | if (TBI_ACCEPT(&adapter->hw, status, |
3615 | rx_desc->errors, length, last_byte)) { | 3631 | rx_desc->errors, length, last_byte)) { |
3616 | spin_lock_irqsave(&adapter->stats_lock, flags); | 3632 | spin_lock_irqsave(&adapter->stats_lock, flags); |
3617 | e1000_tbi_adjust_stats(&adapter->hw, | 3633 | e1000_tbi_adjust_stats(&adapter->hw, |
@@ -3668,7 +3684,7 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, | |||
3668 | } | 3684 | } |
3669 | #else /* CONFIG_E1000_NAPI */ | 3685 | #else /* CONFIG_E1000_NAPI */ |
3670 | if(unlikely(adapter->vlgrp && | 3686 | if(unlikely(adapter->vlgrp && |
3671 | (rx_desc->status & E1000_RXD_STAT_VP))) { | 3687 | (status & E1000_RXD_STAT_VP))) { |
3672 | vlan_hwaccel_rx(skb, adapter->vlgrp, | 3688 | vlan_hwaccel_rx(skb, adapter->vlgrp, |
3673 | le16_to_cpu(rx_desc->special) & | 3689 | le16_to_cpu(rx_desc->special) & |
3674 | E1000_RXD_SPC_VLAN_MASK); | 3690 | E1000_RXD_SPC_VLAN_MASK); |
@@ -3795,12 +3811,8 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, | |||
3795 | skb->protocol = eth_type_trans(skb, netdev); | 3811 | skb->protocol = eth_type_trans(skb, netdev); |
3796 | 3812 | ||
3797 | if(likely(rx_desc->wb.upper.header_status & | 3813 | if(likely(rx_desc->wb.upper.header_status & |
3798 | E1000_RXDPS_HDRSTAT_HDRSP)) { | 3814 | E1000_RXDPS_HDRSTAT_HDRSP)) |
3799 | adapter->rx_hdr_split++; | 3815 | adapter->rx_hdr_split++; |
3800 | #ifdef HAVE_RX_ZERO_COPY | ||
3801 | skb_shinfo(skb)->zero_copy = TRUE; | ||
3802 | #endif | ||
3803 | } | ||
3804 | #ifdef CONFIG_E1000_NAPI | 3816 | #ifdef CONFIG_E1000_NAPI |
3805 | if(unlikely(adapter->vlgrp && (staterr & E1000_RXD_STAT_VP))) { | 3817 | if(unlikely(adapter->vlgrp && (staterr & E1000_RXD_STAT_VP))) { |
3806 | vlan_hwaccel_receive_skb(skb, adapter->vlgrp, | 3818 | vlan_hwaccel_receive_skb(skb, adapter->vlgrp, |
@@ -3940,20 +3952,22 @@ map_skb: | |||
3940 | rx_desc = E1000_RX_DESC(*rx_ring, i); | 3952 | rx_desc = E1000_RX_DESC(*rx_ring, i); |
3941 | rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma); | 3953 | rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma); |
3942 | 3954 | ||
3943 | if(unlikely((i & ~(E1000_RX_BUFFER_WRITE - 1)) == i)) { | ||
3944 | /* Force memory writes to complete before letting h/w | ||
3945 | * know there are new descriptors to fetch. (Only | ||
3946 | * applicable for weak-ordered memory model archs, | ||
3947 | * such as IA-64). */ | ||
3948 | wmb(); | ||
3949 | writel(i, adapter->hw.hw_addr + rx_ring->rdt); | ||
3950 | } | ||
3951 | |||
3952 | if(unlikely(++i == rx_ring->count)) i = 0; | 3955 | if(unlikely(++i == rx_ring->count)) i = 0; |
3953 | buffer_info = &rx_ring->buffer_info[i]; | 3956 | buffer_info = &rx_ring->buffer_info[i]; |
3954 | } | 3957 | } |
3955 | 3958 | ||
3956 | rx_ring->next_to_use = i; | 3959 | if (likely(rx_ring->next_to_use != i)) { |
3960 | rx_ring->next_to_use = i; | ||
3961 | if (unlikely(i-- == 0)) | ||
3962 | i = (rx_ring->count - 1); | ||
3963 | |||
3964 | /* Force memory writes to complete before letting h/w | ||
3965 | * know there are new descriptors to fetch. (Only | ||
3966 | * applicable for weak-ordered memory model archs, | ||
3967 | * such as IA-64). */ | ||
3968 | wmb(); | ||
3969 | writel(i, adapter->hw.hw_addr + rx_ring->rdt); | ||
3970 | } | ||
3957 | } | 3971 | } |
3958 | 3972 | ||
3959 | /** | 3973 | /** |
@@ -3988,8 +4002,10 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, | |||
3988 | if (likely(!ps_page->ps_page[j])) { | 4002 | if (likely(!ps_page->ps_page[j])) { |
3989 | ps_page->ps_page[j] = | 4003 | ps_page->ps_page[j] = |
3990 | alloc_page(GFP_ATOMIC); | 4004 | alloc_page(GFP_ATOMIC); |
3991 | if (unlikely(!ps_page->ps_page[j])) | 4005 | if (unlikely(!ps_page->ps_page[j])) { |
4006 | adapter->alloc_rx_buff_failed++; | ||
3992 | goto no_buffers; | 4007 | goto no_buffers; |
4008 | } | ||
3993 | ps_page_dma->ps_page_dma[j] = | 4009 | ps_page_dma->ps_page_dma[j] = |
3994 | pci_map_page(pdev, | 4010 | pci_map_page(pdev, |
3995 | ps_page->ps_page[j], | 4011 | ps_page->ps_page[j], |
@@ -4008,8 +4024,10 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, | |||
4008 | 4024 | ||
4009 | skb = dev_alloc_skb(adapter->rx_ps_bsize0 + NET_IP_ALIGN); | 4025 | skb = dev_alloc_skb(adapter->rx_ps_bsize0 + NET_IP_ALIGN); |
4010 | 4026 | ||
4011 | if(unlikely(!skb)) | 4027 | if (unlikely(!skb)) { |
4028 | adapter->alloc_rx_buff_failed++; | ||
4012 | break; | 4029 | break; |
4030 | } | ||
4013 | 4031 | ||
4014 | /* Make buffer alignment 2 beyond a 16 byte boundary | 4032 | /* Make buffer alignment 2 beyond a 16 byte boundary |
4015 | * this will result in a 16 byte aligned IP header after | 4033 | * this will result in a 16 byte aligned IP header after |
@@ -4027,19 +4045,6 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, | |||
4027 | 4045 | ||
4028 | rx_desc->read.buffer_addr[0] = cpu_to_le64(buffer_info->dma); | 4046 | rx_desc->read.buffer_addr[0] = cpu_to_le64(buffer_info->dma); |
4029 | 4047 | ||
4030 | if(unlikely((i & ~(E1000_RX_BUFFER_WRITE - 1)) == i)) { | ||
4031 | /* Force memory writes to complete before letting h/w | ||
4032 | * know there are new descriptors to fetch. (Only | ||
4033 | * applicable for weak-ordered memory model archs, | ||
4034 | * such as IA-64). */ | ||
4035 | wmb(); | ||
4036 | /* Hardware increments by 16 bytes, but packet split | ||
4037 | * descriptors are 32 bytes...so we increment tail | ||
4038 | * twice as much. | ||
4039 | */ | ||
4040 | writel(i<<1, adapter->hw.hw_addr + rx_ring->rdt); | ||
4041 | } | ||
4042 | |||
4043 | if(unlikely(++i == rx_ring->count)) i = 0; | 4048 | if(unlikely(++i == rx_ring->count)) i = 0; |
4044 | buffer_info = &rx_ring->buffer_info[i]; | 4049 | buffer_info = &rx_ring->buffer_info[i]; |
4045 | ps_page = &rx_ring->ps_page[i]; | 4050 | ps_page = &rx_ring->ps_page[i]; |
@@ -4047,7 +4052,21 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, | |||
4047 | } | 4052 | } |
4048 | 4053 | ||
4049 | no_buffers: | 4054 | no_buffers: |
4050 | rx_ring->next_to_use = i; | 4055 | if (likely(rx_ring->next_to_use != i)) { |
4056 | rx_ring->next_to_use = i; | ||
4057 | if (unlikely(i-- == 0)) i = (rx_ring->count - 1); | ||
4058 | |||
4059 | /* Force memory writes to complete before letting h/w | ||
4060 | * know there are new descriptors to fetch. (Only | ||
4061 | * applicable for weak-ordered memory model archs, | ||
4062 | * such as IA-64). */ | ||
4063 | wmb(); | ||
4064 | /* Hardware increments by 16 bytes, but packet split | ||
4065 | * descriptors are 32 bytes...so we increment tail | ||
4066 | * twice as much. | ||
4067 | */ | ||
4068 | writel(i<<1, adapter->hw.hw_addr + rx_ring->rdt); | ||
4069 | } | ||
4051 | } | 4070 | } |
4052 | 4071 | ||
4053 | /** | 4072 | /** |