aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Duyck <alexander.h.duyck@intel.com>2008-12-26 04:33:18 -0500
committerDavid S. Miller <davem@davemloft.net>2008-12-26 04:33:18 -0500
commit0e014cb16234c3797aa518d46fe7e1fe91ebcca9 (patch)
tree2fea0619cb67b0a0001fa1018042d089daa4fe72
parentf2712fd0b4097e8385bdb75c0ebd285a057fe299 (diff)
igb: defeature tx head writeback
This patch removes tx head writeback as it was found to not provide a significant improvement in tx performance and on some systems has been seen to cause a performance degredation due to partial cache line writes. Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/igb/e1000_defines.h1
-rw-r--r--drivers/net/igb/igb.h5
-rw-r--r--drivers/net/igb/igb_main.c85
3 files changed, 39 insertions, 52 deletions
diff --git a/drivers/net/igb/e1000_defines.h b/drivers/net/igb/e1000_defines.h
index c5fe784c9e37..40d03426c122 100644
--- a/drivers/net/igb/e1000_defines.h
+++ b/drivers/net/igb/e1000_defines.h
@@ -323,6 +323,7 @@
323#define E1000_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ 323#define E1000_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */
324#define E1000_TXD_CMD_RS 0x08000000 /* Report Status */ 324#define E1000_TXD_CMD_RS 0x08000000 /* Report Status */
325#define E1000_TXD_CMD_DEXT 0x20000000 /* Descriptor extension (0 = legacy) */ 325#define E1000_TXD_CMD_DEXT 0x20000000 /* Descriptor extension (0 = legacy) */
326#define E1000_TXD_STAT_DD 0x00000001 /* Descriptor Done */
326/* Extended desc bits for Linksec and timesync */ 327/* Extended desc bits for Linksec and timesync */
327 328
328/* Transmit Control */ 329/* Transmit Control */
diff --git a/drivers/net/igb/igb.h b/drivers/net/igb/igb.h
index 2121b8bc6ea7..c90632524fda 100644
--- a/drivers/net/igb/igb.h
+++ b/drivers/net/igb/igb.h
@@ -43,8 +43,6 @@ struct igb_adapter;
43#endif 43#endif
44 44
45/* Interrupt defines */ 45/* Interrupt defines */
46#define IGB_MAX_TX_CLEAN 72
47
48#define IGB_MIN_DYN_ITR 3000 46#define IGB_MIN_DYN_ITR 3000
49#define IGB_MAX_DYN_ITR 96000 47#define IGB_MAX_DYN_ITR 96000
50 48
@@ -127,7 +125,8 @@ struct igb_buffer {
127 /* TX */ 125 /* TX */
128 struct { 126 struct {
129 unsigned long time_stamp; 127 unsigned long time_stamp;
130 u32 length; 128 u16 length;
129 u16 next_to_watch;
131 }; 130 };
132 /* RX */ 131 /* RX */
133 struct { 132 struct {
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 6a40d9486daf..4962cdfc507c 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -1575,8 +1575,7 @@ int igb_setup_tx_resources(struct igb_adapter *adapter,
1575 memset(tx_ring->buffer_info, 0, size); 1575 memset(tx_ring->buffer_info, 0, size);
1576 1576
1577 /* round up to nearest 4K */ 1577 /* round up to nearest 4K */
1578 tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc) 1578 tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc);
1579 + sizeof(u32);
1580 tx_ring->size = ALIGN(tx_ring->size, 4096); 1579 tx_ring->size = ALIGN(tx_ring->size, 4096);
1581 1580
1582 tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size, 1581 tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size,
@@ -1635,7 +1634,7 @@ static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
1635 **/ 1634 **/
1636static void igb_configure_tx(struct igb_adapter *adapter) 1635static void igb_configure_tx(struct igb_adapter *adapter)
1637{ 1636{
1638 u64 tdba, tdwba; 1637 u64 tdba;
1639 struct e1000_hw *hw = &adapter->hw; 1638 struct e1000_hw *hw = &adapter->hw;
1640 u32 tctl; 1639 u32 tctl;
1641 u32 txdctl, txctrl; 1640 u32 txdctl, txctrl;
@@ -1651,12 +1650,6 @@ static void igb_configure_tx(struct igb_adapter *adapter)
1651 tdba & 0x00000000ffffffffULL); 1650 tdba & 0x00000000ffffffffULL);
1652 wr32(E1000_TDBAH(i), tdba >> 32); 1651 wr32(E1000_TDBAH(i), tdba >> 32);
1653 1652
1654 tdwba = ring->dma + ring->count * sizeof(struct e1000_tx_desc);
1655 tdwba |= 1; /* enable head wb */
1656 wr32(E1000_TDWBAL(i),
1657 tdwba & 0x00000000ffffffffULL);
1658 wr32(E1000_TDWBAH(i), tdwba >> 32);
1659
1660 ring->head = E1000_TDH(i); 1653 ring->head = E1000_TDH(i);
1661 ring->tail = E1000_TDT(i); 1654 ring->tail = E1000_TDT(i);
1662 writel(0, hw->hw_addr + ring->tail); 1655 writel(0, hw->hw_addr + ring->tail);
@@ -2710,6 +2703,7 @@ static inline int igb_tso_adv(struct igb_adapter *adapter,
2710 context_desc->seqnum_seed = 0; 2703 context_desc->seqnum_seed = 0;
2711 2704
2712 buffer_info->time_stamp = jiffies; 2705 buffer_info->time_stamp = jiffies;
2706 buffer_info->next_to_watch = i;
2713 buffer_info->dma = 0; 2707 buffer_info->dma = 0;
2714 i++; 2708 i++;
2715 if (i == tx_ring->count) 2709 if (i == tx_ring->count)
@@ -2773,6 +2767,7 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter,
2773 cpu_to_le32(tx_ring->queue_index << 4); 2767 cpu_to_le32(tx_ring->queue_index << 4);
2774 2768
2775 buffer_info->time_stamp = jiffies; 2769 buffer_info->time_stamp = jiffies;
2770 buffer_info->next_to_watch = i;
2776 buffer_info->dma = 0; 2771 buffer_info->dma = 0;
2777 2772
2778 i++; 2773 i++;
@@ -2791,8 +2786,8 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter,
2791#define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR) 2786#define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
2792 2787
2793static inline int igb_tx_map_adv(struct igb_adapter *adapter, 2788static inline int igb_tx_map_adv(struct igb_adapter *adapter,
2794 struct igb_ring *tx_ring, 2789 struct igb_ring *tx_ring, struct sk_buff *skb,
2795 struct sk_buff *skb) 2790 unsigned int first)
2796{ 2791{
2797 struct igb_buffer *buffer_info; 2792 struct igb_buffer *buffer_info;
2798 unsigned int len = skb_headlen(skb); 2793 unsigned int len = skb_headlen(skb);
@@ -2806,6 +2801,7 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter,
2806 buffer_info->length = len; 2801 buffer_info->length = len;
2807 /* set time_stamp *before* dma to help avoid a possible race */ 2802 /* set time_stamp *before* dma to help avoid a possible race */
2808 buffer_info->time_stamp = jiffies; 2803 buffer_info->time_stamp = jiffies;
2804 buffer_info->next_to_watch = i;
2809 buffer_info->dma = pci_map_single(adapter->pdev, skb->data, len, 2805 buffer_info->dma = pci_map_single(adapter->pdev, skb->data, len,
2810 PCI_DMA_TODEVICE); 2806 PCI_DMA_TODEVICE);
2811 count++; 2807 count++;
@@ -2823,6 +2819,7 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter,
2823 BUG_ON(len >= IGB_MAX_DATA_PER_TXD); 2819 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
2824 buffer_info->length = len; 2820 buffer_info->length = len;
2825 buffer_info->time_stamp = jiffies; 2821 buffer_info->time_stamp = jiffies;
2822 buffer_info->next_to_watch = i;
2826 buffer_info->dma = pci_map_page(adapter->pdev, 2823 buffer_info->dma = pci_map_page(adapter->pdev,
2827 frag->page, 2824 frag->page,
2828 frag->page_offset, 2825 frag->page_offset,
@@ -2835,8 +2832,9 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter,
2835 i = 0; 2832 i = 0;
2836 } 2833 }
2837 2834
2838 i = (i == 0) ? tx_ring->count - 1 : i - 1; 2835 i = ((i == 0) ? tx_ring->count - 1 : i - 1);
2839 tx_ring->buffer_info[i].skb = skb; 2836 tx_ring->buffer_info[i].skb = skb;
2837 tx_ring->buffer_info[first].next_to_watch = i;
2840 2838
2841 return count; 2839 return count;
2842} 2840}
@@ -2943,6 +2941,7 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb,
2943 struct igb_ring *tx_ring) 2941 struct igb_ring *tx_ring)
2944{ 2942{
2945 struct igb_adapter *adapter = netdev_priv(netdev); 2943 struct igb_adapter *adapter = netdev_priv(netdev);
2944 unsigned int first;
2946 unsigned int tx_flags = 0; 2945 unsigned int tx_flags = 0;
2947 unsigned int len; 2946 unsigned int len;
2948 u8 hdr_len = 0; 2947 u8 hdr_len = 0;
@@ -2979,6 +2978,8 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb,
2979 if (skb->protocol == htons(ETH_P_IP)) 2978 if (skb->protocol == htons(ETH_P_IP))
2980 tx_flags |= IGB_TX_FLAGS_IPV4; 2979 tx_flags |= IGB_TX_FLAGS_IPV4;
2981 2980
2981 first = tx_ring->next_to_use;
2982
2982 tso = skb_is_gso(skb) ? igb_tso_adv(adapter, tx_ring, skb, tx_flags, 2983 tso = skb_is_gso(skb) ? igb_tso_adv(adapter, tx_ring, skb, tx_flags,
2983 &hdr_len) : 0; 2984 &hdr_len) : 0;
2984 2985
@@ -2994,7 +2995,7 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb,
2994 tx_flags |= IGB_TX_FLAGS_CSUM; 2995 tx_flags |= IGB_TX_FLAGS_CSUM;
2995 2996
2996 igb_tx_queue_adv(adapter, tx_ring, tx_flags, 2997 igb_tx_queue_adv(adapter, tx_ring, tx_flags,
2997 igb_tx_map_adv(adapter, tx_ring, skb), 2998 igb_tx_map_adv(adapter, tx_ring, skb, first),
2998 skb->len, hdr_len); 2999 skb->len, hdr_len);
2999 3000
3000 netdev->trans_start = jiffies; 3001 netdev->trans_start = jiffies;
@@ -3617,12 +3618,6 @@ static int igb_clean_rx_ring_msix(struct napi_struct *napi, int budget)
3617 return 1; 3618 return 1;
3618} 3619}
3619 3620
3620static inline u32 get_head(struct igb_ring *tx_ring)
3621{
3622 void *end = (struct e1000_tx_desc *)tx_ring->desc + tx_ring->count;
3623 return le32_to_cpu(*(volatile __le32 *)end);
3624}
3625
3626/** 3621/**
3627 * igb_clean_tx_irq - Reclaim resources after transmit completes 3622 * igb_clean_tx_irq - Reclaim resources after transmit completes
3628 * @adapter: board private structure 3623 * @adapter: board private structure
@@ -3631,24 +3626,25 @@ static inline u32 get_head(struct igb_ring *tx_ring)
3631static bool igb_clean_tx_irq(struct igb_ring *tx_ring) 3626static bool igb_clean_tx_irq(struct igb_ring *tx_ring)
3632{ 3627{
3633 struct igb_adapter *adapter = tx_ring->adapter; 3628 struct igb_adapter *adapter = tx_ring->adapter;
3634 struct e1000_hw *hw = &adapter->hw;
3635 struct net_device *netdev = adapter->netdev; 3629 struct net_device *netdev = adapter->netdev;
3636 struct e1000_tx_desc *tx_desc; 3630 struct e1000_hw *hw = &adapter->hw;
3637 struct igb_buffer *buffer_info; 3631 struct igb_buffer *buffer_info;
3638 struct sk_buff *skb; 3632 struct sk_buff *skb;
3639 unsigned int i; 3633 union e1000_adv_tx_desc *tx_desc, *eop_desc;
3640 u32 head, oldhead;
3641 unsigned int count = 0;
3642 unsigned int total_bytes = 0, total_packets = 0; 3634 unsigned int total_bytes = 0, total_packets = 0;
3643 bool retval = true; 3635 unsigned int i, eop, count = 0;
3636 bool cleaned = false;
3644 3637
3645 rmb();
3646 head = get_head(tx_ring);
3647 i = tx_ring->next_to_clean; 3638 i = tx_ring->next_to_clean;
3648 while (1) { 3639 eop = tx_ring->buffer_info[i].next_to_watch;
3649 while (i != head) { 3640 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
3650 tx_desc = E1000_TX_DESC(*tx_ring, i); 3641
3642 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
3643 (count < tx_ring->count)) {
3644 for (cleaned = false; !cleaned; count++) {
3645 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3651 buffer_info = &tx_ring->buffer_info[i]; 3646 buffer_info = &tx_ring->buffer_info[i];
3647 cleaned = (i == eop);
3652 skb = buffer_info->skb; 3648 skb = buffer_info->skb;
3653 3649
3654 if (skb) { 3650 if (skb) {
@@ -3663,25 +3659,17 @@ static bool igb_clean_tx_irq(struct igb_ring *tx_ring)
3663 } 3659 }
3664 3660
3665 igb_unmap_and_free_tx_resource(adapter, buffer_info); 3661 igb_unmap_and_free_tx_resource(adapter, buffer_info);
3662 tx_desc->wb.status = 0;
3666 3663
3667 i++; 3664 i++;
3668 if (i == tx_ring->count) 3665 if (i == tx_ring->count)
3669 i = 0; 3666 i = 0;
3670
3671 count++;
3672 if (count == IGB_MAX_TX_CLEAN) {
3673 retval = false;
3674 goto done_cleaning;
3675 }
3676 } 3667 }
3677 oldhead = head; 3668
3678 rmb(); 3669 eop = tx_ring->buffer_info[i].next_to_watch;
3679 head = get_head(tx_ring); 3670 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
3680 if (head == oldhead) 3671 }
3681 goto done_cleaning; 3672
3682 } /* while (1) */
3683
3684done_cleaning:
3685 tx_ring->next_to_clean = i; 3673 tx_ring->next_to_clean = i;
3686 3674
3687 if (unlikely(count && 3675 if (unlikely(count &&
@@ -3708,7 +3696,6 @@ done_cleaning:
3708 && !(rd32(E1000_STATUS) & 3696 && !(rd32(E1000_STATUS) &
3709 E1000_STATUS_TXOFF)) { 3697 E1000_STATUS_TXOFF)) {
3710 3698
3711 tx_desc = E1000_TX_DESC(*tx_ring, i);
3712 /* detected Tx unit hang */ 3699 /* detected Tx unit hang */
3713 dev_err(&adapter->pdev->dev, 3700 dev_err(&adapter->pdev->dev,
3714 "Detected Tx Unit Hang\n" 3701 "Detected Tx Unit Hang\n"
@@ -3717,9 +3704,9 @@ done_cleaning:
3717 " TDT <%x>\n" 3704 " TDT <%x>\n"
3718 " next_to_use <%x>\n" 3705 " next_to_use <%x>\n"
3719 " next_to_clean <%x>\n" 3706 " next_to_clean <%x>\n"
3720 " head (WB) <%x>\n"
3721 "buffer_info[next_to_clean]\n" 3707 "buffer_info[next_to_clean]\n"
3722 " time_stamp <%lx>\n" 3708 " time_stamp <%lx>\n"
3709 " next_to_watch <%x>\n"
3723 " jiffies <%lx>\n" 3710 " jiffies <%lx>\n"
3724 " desc.status <%x>\n", 3711 " desc.status <%x>\n",
3725 tx_ring->queue_index, 3712 tx_ring->queue_index,
@@ -3727,10 +3714,10 @@ done_cleaning:
3727 readl(adapter->hw.hw_addr + tx_ring->tail), 3714 readl(adapter->hw.hw_addr + tx_ring->tail),
3728 tx_ring->next_to_use, 3715 tx_ring->next_to_use,
3729 tx_ring->next_to_clean, 3716 tx_ring->next_to_clean,
3730 head,
3731 tx_ring->buffer_info[i].time_stamp, 3717 tx_ring->buffer_info[i].time_stamp,
3718 eop,
3732 jiffies, 3719 jiffies,
3733 tx_desc->upper.fields.status); 3720 eop_desc->wb.status);
3734 netif_stop_subqueue(netdev, tx_ring->queue_index); 3721 netif_stop_subqueue(netdev, tx_ring->queue_index);
3735 } 3722 }
3736 } 3723 }
@@ -3740,7 +3727,7 @@ done_cleaning:
3740 tx_ring->tx_stats.packets += total_packets; 3727 tx_ring->tx_stats.packets += total_packets;
3741 adapter->net_stats.tx_bytes += total_bytes; 3728 adapter->net_stats.tx_bytes += total_bytes;
3742 adapter->net_stats.tx_packets += total_packets; 3729 adapter->net_stats.tx_packets += total_packets;
3743 return retval; 3730 return (count < tx_ring->count);
3744} 3731}
3745 3732
3746#ifdef CONFIG_IGB_LRO 3733#ifdef CONFIG_IGB_LRO