diff options
author | Michael Chan <mchan@broadcom.com> | 2006-08-08 00:46:02 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2006-08-08 00:46:02 -0400 |
commit | 1b2a720506ccf7c30baaeda5d990c29b31e21726 (patch) | |
tree | 5990f56e364077c83563566ed64beae95b855a9f /drivers | |
parent | bd37a088596ccdb2b2dd3299e25e333bca7a9a34 (diff) |
[TG3]: Fix tx race condition
Fix a subtle race condition between tg3_start_xmit() and tg3_tx()
discovered by Herbert Xu <herbert@gondor.apana.org.au>:
CPU0 CPU1
tg3_start_xmit()
if (tx_ring_full) {
tx_lock
tg3_tx()
if (!netif_queue_stopped)
netif_stop_queue()
if (!tx_ring_full)
update_tx_ring
netif_wake_queue()
tx_unlock
}
Even though tx_ring is updated before the if statement in tg3_tx() in
program order, it can be re-ordered by the CPU as shown above. This
scenario can cause the tx queue to be stopped forever if tg3_tx() has
just freed up the entire tx_ring. The possibility of this happening
should be very rare though.
The following changes are made:
1. Add memory barrier to fix the above race condition.
2. Eliminate the private tx_lock altogether and rely solely on
netif_tx_lock. This eliminates one spinlock in tg3_start_xmit()
when the ring is full.
3. Because of 2, use netif_tx_lock in tg3_tx() before calling
netif_wake_queue().
4. Change TX_BUFFS_AVAIL to an inline function with a memory barrier.
Herbert and David suggested using the memory barrier instead of
volatile.
5. Check for the full wake queue condition before getting
netif_tx_lock in tg3_tx(). This reduces the number of unnecessary
spinlocks when the tx ring is full in a steady-state condition.
6. Update version to 3.65.
Signed-off-by: Michael Chan <mchan@broadcom.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/net/tg3.c | 49 | ||||
-rw-r--r-- | drivers/net/tg3.h | 8 |
2 files changed, 31 insertions, 26 deletions
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 0afbed6753fa..eafabb253f08 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c | |||
@@ -68,8 +68,8 @@ | |||
68 | 68 | ||
69 | #define DRV_MODULE_NAME "tg3" | 69 | #define DRV_MODULE_NAME "tg3" |
70 | #define PFX DRV_MODULE_NAME ": " | 70 | #define PFX DRV_MODULE_NAME ": " |
71 | #define DRV_MODULE_VERSION "3.64" | 71 | #define DRV_MODULE_VERSION "3.65" |
72 | #define DRV_MODULE_RELDATE "July 31, 2006" | 72 | #define DRV_MODULE_RELDATE "August 07, 2006" |
73 | 73 | ||
74 | #define TG3_DEF_MAC_MODE 0 | 74 | #define TG3_DEF_MAC_MODE 0 |
75 | #define TG3_DEF_RX_MODE 0 | 75 | #define TG3_DEF_RX_MODE 0 |
@@ -123,9 +123,6 @@ | |||
123 | TG3_RX_RCB_RING_SIZE(tp)) | 123 | TG3_RX_RCB_RING_SIZE(tp)) |
124 | #define TG3_TX_RING_BYTES (sizeof(struct tg3_tx_buffer_desc) * \ | 124 | #define TG3_TX_RING_BYTES (sizeof(struct tg3_tx_buffer_desc) * \ |
125 | TG3_TX_RING_SIZE) | 125 | TG3_TX_RING_SIZE) |
126 | #define TX_BUFFS_AVAIL(TP) \ | ||
127 | ((TP)->tx_pending - \ | ||
128 | (((TP)->tx_prod - (TP)->tx_cons) & (TG3_TX_RING_SIZE - 1))) | ||
129 | #define NEXT_TX(N) (((N) + 1) & (TG3_TX_RING_SIZE - 1)) | 126 | #define NEXT_TX(N) (((N) + 1) & (TG3_TX_RING_SIZE - 1)) |
130 | 127 | ||
131 | #define RX_PKT_BUF_SZ (1536 + tp->rx_offset + 64) | 128 | #define RX_PKT_BUF_SZ (1536 + tp->rx_offset + 64) |
@@ -2987,6 +2984,13 @@ static void tg3_tx_recover(struct tg3 *tp) | |||
2987 | spin_unlock(&tp->lock); | 2984 | spin_unlock(&tp->lock); |
2988 | } | 2985 | } |
2989 | 2986 | ||
2987 | static inline u32 tg3_tx_avail(struct tg3 *tp) | ||
2988 | { | ||
2989 | smp_mb(); | ||
2990 | return (tp->tx_pending - | ||
2991 | ((tp->tx_prod - tp->tx_cons) & (TG3_TX_RING_SIZE - 1))); | ||
2992 | } | ||
2993 | |||
2990 | /* Tigon3 never reports partial packet sends. So we do not | 2994 | /* Tigon3 never reports partial packet sends. So we do not |
2991 | * need special logic to handle SKBs that have not had all | 2995 | * need special logic to handle SKBs that have not had all |
2992 | * of their frags sent yet, like SunGEM does. | 2996 | * of their frags sent yet, like SunGEM does. |
@@ -3038,12 +3042,20 @@ static void tg3_tx(struct tg3 *tp) | |||
3038 | 3042 | ||
3039 | tp->tx_cons = sw_idx; | 3043 | tp->tx_cons = sw_idx; |
3040 | 3044 | ||
3041 | if (unlikely(netif_queue_stopped(tp->dev))) { | 3045 | /* Need to make the tx_cons update visible to tg3_start_xmit() |
3042 | spin_lock(&tp->tx_lock); | 3046 | * before checking for netif_queue_stopped(). Without the |
3047 | * memory barrier, there is a small possibility that tg3_start_xmit() | ||
3048 | * will miss it and cause the queue to be stopped forever. | ||
3049 | */ | ||
3050 | smp_mb(); | ||
3051 | |||
3052 | if (unlikely(netif_queue_stopped(tp->dev) && | ||
3053 | (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH))) { | ||
3054 | netif_tx_lock(tp->dev); | ||
3043 | if (netif_queue_stopped(tp->dev) && | 3055 | if (netif_queue_stopped(tp->dev) && |
3044 | (TX_BUFFS_AVAIL(tp) > TG3_TX_WAKEUP_THRESH)) | 3056 | (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH)) |
3045 | netif_wake_queue(tp->dev); | 3057 | netif_wake_queue(tp->dev); |
3046 | spin_unlock(&tp->tx_lock); | 3058 | netif_tx_unlock(tp->dev); |
3047 | } | 3059 | } |
3048 | } | 3060 | } |
3049 | 3061 | ||
@@ -3795,7 +3807,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
3795 | * interrupt. Furthermore, IRQ processing runs lockless so we have | 3807 | * interrupt. Furthermore, IRQ processing runs lockless so we have |
3796 | * no IRQ context deadlocks to worry about either. Rejoice! | 3808 | * no IRQ context deadlocks to worry about either. Rejoice! |
3797 | */ | 3809 | */ |
3798 | if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) { | 3810 | if (unlikely(tg3_tx_avail(tp) <= (skb_shinfo(skb)->nr_frags + 1))) { |
3799 | if (!netif_queue_stopped(dev)) { | 3811 | if (!netif_queue_stopped(dev)) { |
3800 | netif_stop_queue(dev); | 3812 | netif_stop_queue(dev); |
3801 | 3813 | ||
@@ -3891,12 +3903,10 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
3891 | tw32_tx_mbox((MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW), entry); | 3903 | tw32_tx_mbox((MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW), entry); |
3892 | 3904 | ||
3893 | tp->tx_prod = entry; | 3905 | tp->tx_prod = entry; |
3894 | if (unlikely(TX_BUFFS_AVAIL(tp) <= (MAX_SKB_FRAGS + 1))) { | 3906 | if (unlikely(tg3_tx_avail(tp) <= (MAX_SKB_FRAGS + 1))) { |
3895 | spin_lock(&tp->tx_lock); | ||
3896 | netif_stop_queue(dev); | 3907 | netif_stop_queue(dev); |
3897 | if (TX_BUFFS_AVAIL(tp) > TG3_TX_WAKEUP_THRESH) | 3908 | if (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH) |
3898 | netif_wake_queue(tp->dev); | 3909 | netif_wake_queue(tp->dev); |
3899 | spin_unlock(&tp->tx_lock); | ||
3900 | } | 3910 | } |
3901 | 3911 | ||
3902 | out_unlock: | 3912 | out_unlock: |
@@ -3918,7 +3928,7 @@ static int tg3_tso_bug(struct tg3 *tp, struct sk_buff *skb) | |||
3918 | struct sk_buff *segs, *nskb; | 3928 | struct sk_buff *segs, *nskb; |
3919 | 3929 | ||
3920 | /* Estimate the number of fragments in the worst case */ | 3930 | /* Estimate the number of fragments in the worst case */ |
3921 | if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->gso_segs * 3))) { | 3931 | if (unlikely(tg3_tx_avail(tp) <= (skb_shinfo(skb)->gso_segs * 3))) { |
3922 | netif_stop_queue(tp->dev); | 3932 | netif_stop_queue(tp->dev); |
3923 | return NETDEV_TX_BUSY; | 3933 | return NETDEV_TX_BUSY; |
3924 | } | 3934 | } |
@@ -3958,7 +3968,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev) | |||
3958 | * interrupt. Furthermore, IRQ processing runs lockless so we have | 3968 | * interrupt. Furthermore, IRQ processing runs lockless so we have |
3959 | * no IRQ context deadlocks to worry about either. Rejoice! | 3969 | * no IRQ context deadlocks to worry about either. Rejoice! |
3960 | */ | 3970 | */ |
3961 | if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) { | 3971 | if (unlikely(tg3_tx_avail(tp) <= (skb_shinfo(skb)->nr_frags + 1))) { |
3962 | if (!netif_queue_stopped(dev)) { | 3972 | if (!netif_queue_stopped(dev)) { |
3963 | netif_stop_queue(dev); | 3973 | netif_stop_queue(dev); |
3964 | 3974 | ||
@@ -4108,12 +4118,10 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev) | |||
4108 | tw32_tx_mbox((MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW), entry); | 4118 | tw32_tx_mbox((MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW), entry); |
4109 | 4119 | ||
4110 | tp->tx_prod = entry; | 4120 | tp->tx_prod = entry; |
4111 | if (unlikely(TX_BUFFS_AVAIL(tp) <= (MAX_SKB_FRAGS + 1))) { | 4121 | if (unlikely(tg3_tx_avail(tp) <= (MAX_SKB_FRAGS + 1))) { |
4112 | spin_lock(&tp->tx_lock); | ||
4113 | netif_stop_queue(dev); | 4122 | netif_stop_queue(dev); |
4114 | if (TX_BUFFS_AVAIL(tp) > TG3_TX_WAKEUP_THRESH) | 4123 | if (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH) |
4115 | netif_wake_queue(tp->dev); | 4124 | netif_wake_queue(tp->dev); |
4116 | spin_unlock(&tp->tx_lock); | ||
4117 | } | 4125 | } |
4118 | 4126 | ||
4119 | out_unlock: | 4127 | out_unlock: |
@@ -11472,7 +11480,6 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, | |||
11472 | tp->grc_mode |= GRC_MODE_BSWAP_NONFRM_DATA; | 11480 | tp->grc_mode |= GRC_MODE_BSWAP_NONFRM_DATA; |
11473 | #endif | 11481 | #endif |
11474 | spin_lock_init(&tp->lock); | 11482 | spin_lock_init(&tp->lock); |
11475 | spin_lock_init(&tp->tx_lock); | ||
11476 | spin_lock_init(&tp->indirect_lock); | 11483 | spin_lock_init(&tp->indirect_lock); |
11477 | INIT_WORK(&tp->reset_task, tg3_reset_task, tp); | 11484 | INIT_WORK(&tp->reset_task, tg3_reset_task, tp); |
11478 | 11485 | ||
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index ba2c98711c88..3ecf356cfb08 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h | |||
@@ -2079,9 +2079,9 @@ struct tg3 { | |||
2079 | * lock: Held during reset, PHY access, timer, and when | 2079 | * lock: Held during reset, PHY access, timer, and when |
2080 | * updating tg3_flags and tg3_flags2. | 2080 | * updating tg3_flags and tg3_flags2. |
2081 | * | 2081 | * |
2082 | * tx_lock: Held during tg3_start_xmit and tg3_tx only | 2082 | * netif_tx_lock: Held during tg3_start_xmit. tg3_tx holds |
2083 | * when calling netif_[start|stop]_queue. | 2083 | * netif_tx_lock when it needs to call |
2084 | * tg3_start_xmit is protected by netif_tx_lock. | 2084 | * netif_wake_queue. |
2085 | * | 2085 | * |
2086 | * Both of these locks are to be held with BH safety. | 2086 | * Both of these locks are to be held with BH safety. |
2087 | * | 2087 | * |
@@ -2118,8 +2118,6 @@ struct tg3 { | |||
2118 | u32 tx_cons; | 2118 | u32 tx_cons; |
2119 | u32 tx_pending; | 2119 | u32 tx_pending; |
2120 | 2120 | ||
2121 | spinlock_t tx_lock; | ||
2122 | |||
2123 | struct tg3_tx_buffer_desc *tx_ring; | 2121 | struct tg3_tx_buffer_desc *tx_ring; |
2124 | struct tx_ring_info *tx_buffers; | 2122 | struct tx_ring_info *tx_buffers; |
2125 | dma_addr_t tx_desc_mapping; | 2123 | dma_addr_t tx_desc_mapping; |