diff options
author | Alexander Duyck <alexander.h.duyck@intel.com> | 2008-12-26 04:33:18 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-12-26 04:33:18 -0500 |
commit | 0e014cb16234c3797aa518d46fe7e1fe91ebcca9 (patch) | |
tree | 2fea0619cb67b0a0001fa1018042d089daa4fe72 | |
parent | f2712fd0b4097e8385bdb75c0ebd285a057fe299 (diff) |
igb: defeature tx head writeback
This patch removes tx head writeback as it was found to not provide a
significant improvement in tx performance and on some systems has been seen
to cause a performance degredation due to partial cache line writes.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/igb/e1000_defines.h | 1 | ||||
-rw-r--r-- | drivers/net/igb/igb.h | 5 | ||||
-rw-r--r-- | drivers/net/igb/igb_main.c | 85 |
3 files changed, 39 insertions, 52 deletions
diff --git a/drivers/net/igb/e1000_defines.h b/drivers/net/igb/e1000_defines.h index c5fe784c9e37..40d03426c122 100644 --- a/drivers/net/igb/e1000_defines.h +++ b/drivers/net/igb/e1000_defines.h | |||
@@ -323,6 +323,7 @@ | |||
323 | #define E1000_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ | 323 | #define E1000_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ |
324 | #define E1000_TXD_CMD_RS 0x08000000 /* Report Status */ | 324 | #define E1000_TXD_CMD_RS 0x08000000 /* Report Status */ |
325 | #define E1000_TXD_CMD_DEXT 0x20000000 /* Descriptor extension (0 = legacy) */ | 325 | #define E1000_TXD_CMD_DEXT 0x20000000 /* Descriptor extension (0 = legacy) */ |
326 | #define E1000_TXD_STAT_DD 0x00000001 /* Descriptor Done */ | ||
326 | /* Extended desc bits for Linksec and timesync */ | 327 | /* Extended desc bits for Linksec and timesync */ |
327 | 328 | ||
328 | /* Transmit Control */ | 329 | /* Transmit Control */ |
diff --git a/drivers/net/igb/igb.h b/drivers/net/igb/igb.h index 2121b8bc6ea7..c90632524fda 100644 --- a/drivers/net/igb/igb.h +++ b/drivers/net/igb/igb.h | |||
@@ -43,8 +43,6 @@ struct igb_adapter; | |||
43 | #endif | 43 | #endif |
44 | 44 | ||
45 | /* Interrupt defines */ | 45 | /* Interrupt defines */ |
46 | #define IGB_MAX_TX_CLEAN 72 | ||
47 | |||
48 | #define IGB_MIN_DYN_ITR 3000 | 46 | #define IGB_MIN_DYN_ITR 3000 |
49 | #define IGB_MAX_DYN_ITR 96000 | 47 | #define IGB_MAX_DYN_ITR 96000 |
50 | 48 | ||
@@ -127,7 +125,8 @@ struct igb_buffer { | |||
127 | /* TX */ | 125 | /* TX */ |
128 | struct { | 126 | struct { |
129 | unsigned long time_stamp; | 127 | unsigned long time_stamp; |
130 | u32 length; | 128 | u16 length; |
129 | u16 next_to_watch; | ||
131 | }; | 130 | }; |
132 | /* RX */ | 131 | /* RX */ |
133 | struct { | 132 | struct { |
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index 6a40d9486daf..4962cdfc507c 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c | |||
@@ -1575,8 +1575,7 @@ int igb_setup_tx_resources(struct igb_adapter *adapter, | |||
1575 | memset(tx_ring->buffer_info, 0, size); | 1575 | memset(tx_ring->buffer_info, 0, size); |
1576 | 1576 | ||
1577 | /* round up to nearest 4K */ | 1577 | /* round up to nearest 4K */ |
1578 | tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc) | 1578 | tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc); |
1579 | + sizeof(u32); | ||
1580 | tx_ring->size = ALIGN(tx_ring->size, 4096); | 1579 | tx_ring->size = ALIGN(tx_ring->size, 4096); |
1581 | 1580 | ||
1582 | tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size, | 1581 | tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size, |
@@ -1635,7 +1634,7 @@ static int igb_setup_all_tx_resources(struct igb_adapter *adapter) | |||
1635 | **/ | 1634 | **/ |
1636 | static void igb_configure_tx(struct igb_adapter *adapter) | 1635 | static void igb_configure_tx(struct igb_adapter *adapter) |
1637 | { | 1636 | { |
1638 | u64 tdba, tdwba; | 1637 | u64 tdba; |
1639 | struct e1000_hw *hw = &adapter->hw; | 1638 | struct e1000_hw *hw = &adapter->hw; |
1640 | u32 tctl; | 1639 | u32 tctl; |
1641 | u32 txdctl, txctrl; | 1640 | u32 txdctl, txctrl; |
@@ -1651,12 +1650,6 @@ static void igb_configure_tx(struct igb_adapter *adapter) | |||
1651 | tdba & 0x00000000ffffffffULL); | 1650 | tdba & 0x00000000ffffffffULL); |
1652 | wr32(E1000_TDBAH(i), tdba >> 32); | 1651 | wr32(E1000_TDBAH(i), tdba >> 32); |
1653 | 1652 | ||
1654 | tdwba = ring->dma + ring->count * sizeof(struct e1000_tx_desc); | ||
1655 | tdwba |= 1; /* enable head wb */ | ||
1656 | wr32(E1000_TDWBAL(i), | ||
1657 | tdwba & 0x00000000ffffffffULL); | ||
1658 | wr32(E1000_TDWBAH(i), tdwba >> 32); | ||
1659 | |||
1660 | ring->head = E1000_TDH(i); | 1653 | ring->head = E1000_TDH(i); |
1661 | ring->tail = E1000_TDT(i); | 1654 | ring->tail = E1000_TDT(i); |
1662 | writel(0, hw->hw_addr + ring->tail); | 1655 | writel(0, hw->hw_addr + ring->tail); |
@@ -2710,6 +2703,7 @@ static inline int igb_tso_adv(struct igb_adapter *adapter, | |||
2710 | context_desc->seqnum_seed = 0; | 2703 | context_desc->seqnum_seed = 0; |
2711 | 2704 | ||
2712 | buffer_info->time_stamp = jiffies; | 2705 | buffer_info->time_stamp = jiffies; |
2706 | buffer_info->next_to_watch = i; | ||
2713 | buffer_info->dma = 0; | 2707 | buffer_info->dma = 0; |
2714 | i++; | 2708 | i++; |
2715 | if (i == tx_ring->count) | 2709 | if (i == tx_ring->count) |
@@ -2773,6 +2767,7 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter, | |||
2773 | cpu_to_le32(tx_ring->queue_index << 4); | 2767 | cpu_to_le32(tx_ring->queue_index << 4); |
2774 | 2768 | ||
2775 | buffer_info->time_stamp = jiffies; | 2769 | buffer_info->time_stamp = jiffies; |
2770 | buffer_info->next_to_watch = i; | ||
2776 | buffer_info->dma = 0; | 2771 | buffer_info->dma = 0; |
2777 | 2772 | ||
2778 | i++; | 2773 | i++; |
@@ -2791,8 +2786,8 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter, | |||
2791 | #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR) | 2786 | #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR) |
2792 | 2787 | ||
2793 | static inline int igb_tx_map_adv(struct igb_adapter *adapter, | 2788 | static inline int igb_tx_map_adv(struct igb_adapter *adapter, |
2794 | struct igb_ring *tx_ring, | 2789 | struct igb_ring *tx_ring, struct sk_buff *skb, |
2795 | struct sk_buff *skb) | 2790 | unsigned int first) |
2796 | { | 2791 | { |
2797 | struct igb_buffer *buffer_info; | 2792 | struct igb_buffer *buffer_info; |
2798 | unsigned int len = skb_headlen(skb); | 2793 | unsigned int len = skb_headlen(skb); |
@@ -2806,6 +2801,7 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter, | |||
2806 | buffer_info->length = len; | 2801 | buffer_info->length = len; |
2807 | /* set time_stamp *before* dma to help avoid a possible race */ | 2802 | /* set time_stamp *before* dma to help avoid a possible race */ |
2808 | buffer_info->time_stamp = jiffies; | 2803 | buffer_info->time_stamp = jiffies; |
2804 | buffer_info->next_to_watch = i; | ||
2809 | buffer_info->dma = pci_map_single(adapter->pdev, skb->data, len, | 2805 | buffer_info->dma = pci_map_single(adapter->pdev, skb->data, len, |
2810 | PCI_DMA_TODEVICE); | 2806 | PCI_DMA_TODEVICE); |
2811 | count++; | 2807 | count++; |
@@ -2823,6 +2819,7 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter, | |||
2823 | BUG_ON(len >= IGB_MAX_DATA_PER_TXD); | 2819 | BUG_ON(len >= IGB_MAX_DATA_PER_TXD); |
2824 | buffer_info->length = len; | 2820 | buffer_info->length = len; |
2825 | buffer_info->time_stamp = jiffies; | 2821 | buffer_info->time_stamp = jiffies; |
2822 | buffer_info->next_to_watch = i; | ||
2826 | buffer_info->dma = pci_map_page(adapter->pdev, | 2823 | buffer_info->dma = pci_map_page(adapter->pdev, |
2827 | frag->page, | 2824 | frag->page, |
2828 | frag->page_offset, | 2825 | frag->page_offset, |
@@ -2835,8 +2832,9 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter, | |||
2835 | i = 0; | 2832 | i = 0; |
2836 | } | 2833 | } |
2837 | 2834 | ||
2838 | i = (i == 0) ? tx_ring->count - 1 : i - 1; | 2835 | i = ((i == 0) ? tx_ring->count - 1 : i - 1); |
2839 | tx_ring->buffer_info[i].skb = skb; | 2836 | tx_ring->buffer_info[i].skb = skb; |
2837 | tx_ring->buffer_info[first].next_to_watch = i; | ||
2840 | 2838 | ||
2841 | return count; | 2839 | return count; |
2842 | } | 2840 | } |
@@ -2943,6 +2941,7 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb, | |||
2943 | struct igb_ring *tx_ring) | 2941 | struct igb_ring *tx_ring) |
2944 | { | 2942 | { |
2945 | struct igb_adapter *adapter = netdev_priv(netdev); | 2943 | struct igb_adapter *adapter = netdev_priv(netdev); |
2944 | unsigned int first; | ||
2946 | unsigned int tx_flags = 0; | 2945 | unsigned int tx_flags = 0; |
2947 | unsigned int len; | 2946 | unsigned int len; |
2948 | u8 hdr_len = 0; | 2947 | u8 hdr_len = 0; |
@@ -2979,6 +2978,8 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb, | |||
2979 | if (skb->protocol == htons(ETH_P_IP)) | 2978 | if (skb->protocol == htons(ETH_P_IP)) |
2980 | tx_flags |= IGB_TX_FLAGS_IPV4; | 2979 | tx_flags |= IGB_TX_FLAGS_IPV4; |
2981 | 2980 | ||
2981 | first = tx_ring->next_to_use; | ||
2982 | |||
2982 | tso = skb_is_gso(skb) ? igb_tso_adv(adapter, tx_ring, skb, tx_flags, | 2983 | tso = skb_is_gso(skb) ? igb_tso_adv(adapter, tx_ring, skb, tx_flags, |
2983 | &hdr_len) : 0; | 2984 | &hdr_len) : 0; |
2984 | 2985 | ||
@@ -2994,7 +2995,7 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb, | |||
2994 | tx_flags |= IGB_TX_FLAGS_CSUM; | 2995 | tx_flags |= IGB_TX_FLAGS_CSUM; |
2995 | 2996 | ||
2996 | igb_tx_queue_adv(adapter, tx_ring, tx_flags, | 2997 | igb_tx_queue_adv(adapter, tx_ring, tx_flags, |
2997 | igb_tx_map_adv(adapter, tx_ring, skb), | 2998 | igb_tx_map_adv(adapter, tx_ring, skb, first), |
2998 | skb->len, hdr_len); | 2999 | skb->len, hdr_len); |
2999 | 3000 | ||
3000 | netdev->trans_start = jiffies; | 3001 | netdev->trans_start = jiffies; |
@@ -3617,12 +3618,6 @@ static int igb_clean_rx_ring_msix(struct napi_struct *napi, int budget) | |||
3617 | return 1; | 3618 | return 1; |
3618 | } | 3619 | } |
3619 | 3620 | ||
3620 | static inline u32 get_head(struct igb_ring *tx_ring) | ||
3621 | { | ||
3622 | void *end = (struct e1000_tx_desc *)tx_ring->desc + tx_ring->count; | ||
3623 | return le32_to_cpu(*(volatile __le32 *)end); | ||
3624 | } | ||
3625 | |||
3626 | /** | 3621 | /** |
3627 | * igb_clean_tx_irq - Reclaim resources after transmit completes | 3622 | * igb_clean_tx_irq - Reclaim resources after transmit completes |
3628 | * @adapter: board private structure | 3623 | * @adapter: board private structure |
@@ -3631,24 +3626,25 @@ static inline u32 get_head(struct igb_ring *tx_ring) | |||
3631 | static bool igb_clean_tx_irq(struct igb_ring *tx_ring) | 3626 | static bool igb_clean_tx_irq(struct igb_ring *tx_ring) |
3632 | { | 3627 | { |
3633 | struct igb_adapter *adapter = tx_ring->adapter; | 3628 | struct igb_adapter *adapter = tx_ring->adapter; |
3634 | struct e1000_hw *hw = &adapter->hw; | ||
3635 | struct net_device *netdev = adapter->netdev; | 3629 | struct net_device *netdev = adapter->netdev; |
3636 | struct e1000_tx_desc *tx_desc; | 3630 | struct e1000_hw *hw = &adapter->hw; |
3637 | struct igb_buffer *buffer_info; | 3631 | struct igb_buffer *buffer_info; |
3638 | struct sk_buff *skb; | 3632 | struct sk_buff *skb; |
3639 | unsigned int i; | 3633 | union e1000_adv_tx_desc *tx_desc, *eop_desc; |
3640 | u32 head, oldhead; | ||
3641 | unsigned int count = 0; | ||
3642 | unsigned int total_bytes = 0, total_packets = 0; | 3634 | unsigned int total_bytes = 0, total_packets = 0; |
3643 | bool retval = true; | 3635 | unsigned int i, eop, count = 0; |
3636 | bool cleaned = false; | ||
3644 | 3637 | ||
3645 | rmb(); | ||
3646 | head = get_head(tx_ring); | ||
3647 | i = tx_ring->next_to_clean; | 3638 | i = tx_ring->next_to_clean; |
3648 | while (1) { | 3639 | eop = tx_ring->buffer_info[i].next_to_watch; |
3649 | while (i != head) { | 3640 | eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop); |
3650 | tx_desc = E1000_TX_DESC(*tx_ring, i); | 3641 | |
3642 | while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) && | ||
3643 | (count < tx_ring->count)) { | ||
3644 | for (cleaned = false; !cleaned; count++) { | ||
3645 | tx_desc = E1000_TX_DESC_ADV(*tx_ring, i); | ||
3651 | buffer_info = &tx_ring->buffer_info[i]; | 3646 | buffer_info = &tx_ring->buffer_info[i]; |
3647 | cleaned = (i == eop); | ||
3652 | skb = buffer_info->skb; | 3648 | skb = buffer_info->skb; |
3653 | 3649 | ||
3654 | if (skb) { | 3650 | if (skb) { |
@@ -3663,25 +3659,17 @@ static bool igb_clean_tx_irq(struct igb_ring *tx_ring) | |||
3663 | } | 3659 | } |
3664 | 3660 | ||
3665 | igb_unmap_and_free_tx_resource(adapter, buffer_info); | 3661 | igb_unmap_and_free_tx_resource(adapter, buffer_info); |
3662 | tx_desc->wb.status = 0; | ||
3666 | 3663 | ||
3667 | i++; | 3664 | i++; |
3668 | if (i == tx_ring->count) | 3665 | if (i == tx_ring->count) |
3669 | i = 0; | 3666 | i = 0; |
3670 | |||
3671 | count++; | ||
3672 | if (count == IGB_MAX_TX_CLEAN) { | ||
3673 | retval = false; | ||
3674 | goto done_cleaning; | ||
3675 | } | ||
3676 | } | 3667 | } |
3677 | oldhead = head; | 3668 | |
3678 | rmb(); | 3669 | eop = tx_ring->buffer_info[i].next_to_watch; |
3679 | head = get_head(tx_ring); | 3670 | eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop); |
3680 | if (head == oldhead) | 3671 | } |
3681 | goto done_cleaning; | 3672 | |
3682 | } /* while (1) */ | ||
3683 | |||
3684 | done_cleaning: | ||
3685 | tx_ring->next_to_clean = i; | 3673 | tx_ring->next_to_clean = i; |
3686 | 3674 | ||
3687 | if (unlikely(count && | 3675 | if (unlikely(count && |
@@ -3708,7 +3696,6 @@ done_cleaning: | |||
3708 | && !(rd32(E1000_STATUS) & | 3696 | && !(rd32(E1000_STATUS) & |
3709 | E1000_STATUS_TXOFF)) { | 3697 | E1000_STATUS_TXOFF)) { |
3710 | 3698 | ||
3711 | tx_desc = E1000_TX_DESC(*tx_ring, i); | ||
3712 | /* detected Tx unit hang */ | 3699 | /* detected Tx unit hang */ |
3713 | dev_err(&adapter->pdev->dev, | 3700 | dev_err(&adapter->pdev->dev, |
3714 | "Detected Tx Unit Hang\n" | 3701 | "Detected Tx Unit Hang\n" |
@@ -3717,9 +3704,9 @@ done_cleaning: | |||
3717 | " TDT <%x>\n" | 3704 | " TDT <%x>\n" |
3718 | " next_to_use <%x>\n" | 3705 | " next_to_use <%x>\n" |
3719 | " next_to_clean <%x>\n" | 3706 | " next_to_clean <%x>\n" |
3720 | " head (WB) <%x>\n" | ||
3721 | "buffer_info[next_to_clean]\n" | 3707 | "buffer_info[next_to_clean]\n" |
3722 | " time_stamp <%lx>\n" | 3708 | " time_stamp <%lx>\n" |
3709 | " next_to_watch <%x>\n" | ||
3723 | " jiffies <%lx>\n" | 3710 | " jiffies <%lx>\n" |
3724 | " desc.status <%x>\n", | 3711 | " desc.status <%x>\n", |
3725 | tx_ring->queue_index, | 3712 | tx_ring->queue_index, |
@@ -3727,10 +3714,10 @@ done_cleaning: | |||
3727 | readl(adapter->hw.hw_addr + tx_ring->tail), | 3714 | readl(adapter->hw.hw_addr + tx_ring->tail), |
3728 | tx_ring->next_to_use, | 3715 | tx_ring->next_to_use, |
3729 | tx_ring->next_to_clean, | 3716 | tx_ring->next_to_clean, |
3730 | head, | ||
3731 | tx_ring->buffer_info[i].time_stamp, | 3717 | tx_ring->buffer_info[i].time_stamp, |
3718 | eop, | ||
3732 | jiffies, | 3719 | jiffies, |
3733 | tx_desc->upper.fields.status); | 3720 | eop_desc->wb.status); |
3734 | netif_stop_subqueue(netdev, tx_ring->queue_index); | 3721 | netif_stop_subqueue(netdev, tx_ring->queue_index); |
3735 | } | 3722 | } |
3736 | } | 3723 | } |
@@ -3740,7 +3727,7 @@ done_cleaning: | |||
3740 | tx_ring->tx_stats.packets += total_packets; | 3727 | tx_ring->tx_stats.packets += total_packets; |
3741 | adapter->net_stats.tx_bytes += total_bytes; | 3728 | adapter->net_stats.tx_bytes += total_bytes; |
3742 | adapter->net_stats.tx_packets += total_packets; | 3729 | adapter->net_stats.tx_packets += total_packets; |
3743 | return retval; | 3730 | return (count < tx_ring->count); |
3744 | } | 3731 | } |
3745 | 3732 | ||
3746 | #ifdef CONFIG_IGB_LRO | 3733 | #ifdef CONFIG_IGB_LRO |